diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..b731245aeacfbe762aae5ef15ad08318cfaf1c2c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,393 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_001.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_002.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_003.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_004.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_005.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_006.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_007.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_008.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_009.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_010.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_011.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_012.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_013.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_014.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_015.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_016.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_017.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_018.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_019.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_020.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_021.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_022.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_023.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_024.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_025.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_026.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_027.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_028.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_029.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_030.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_031.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_032.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_033.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_034.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_035.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_036.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_037.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_038.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_039.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_040.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_041.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_042.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_043.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_044.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_045.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_046.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_047.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_048.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_049.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_050.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_051.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_052.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_053.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_054.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_055.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_056.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_057.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_058.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_059.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_060.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_061.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_062.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_063.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_064.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_065.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_066.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_067.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_068.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_069.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_070.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_071.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_072.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_073.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_074.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_075.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_076.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_077.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_078.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_079.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_080.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_081.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_082.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_083.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_084.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_085.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_086.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_087.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_088.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_089.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_090.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_091.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_092.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_093.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_30s/messi_part_094.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_001.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_002.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_003.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_004.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_005.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_006.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_007.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_008.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_009.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_010.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_011.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_012.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_013.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_014.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_015.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_016.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_017.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_018.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_019.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_020.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_021.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_022.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_023.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_024.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_025.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_026.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_027.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_028.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_029.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_030.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_031.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_032.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_033.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_034.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_035.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_036.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_037.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_038.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_039.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_040.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_041.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_042.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_043.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_044.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_045.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_046.mp4 filter=lfs diff=lfs merge=lfs -text +API_Transformers/messi/Clips_60s/messi_part_047.mp4 filter=lfs diff=lfs merge=lfs -text +Direct_Transformers/videos/sample1_raw.mp4 filter=lfs diff=lfs merge=lfs -text +Direct_Transformers/videos/sample1_rotated_180.mp4 filter=lfs diff=lfs merge=lfs -text +Direct_Transformers/videos/sample1_rotated_270.mp4 filter=lfs diff=lfs merge=lfs -text +Direct_Transformers/videos/sample1_rotated_90.mp4 filter=lfs diff=lfs merge=lfs -text +Direct_Transformers/videos/sample2.mp4 filter=lfs diff=lfs merge=lfs -text +vllm-deploy/MiniCPM-V-4-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +xywang/demo.jpeg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0000.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0001.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0002.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0003.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0004.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0005.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0006.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0007.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0008.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0009.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0010.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0011.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0012.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0013.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0014.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0015.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0016.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0017.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0018.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0019.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0020.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0021.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0022.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0023.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0024.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0025.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0026.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0027.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0028.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0029.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0030.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0031.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0032.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0033.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0034.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0035.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0036.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0037.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0038.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0039.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0040.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0041.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0042.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0043.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0044.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0045.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0046.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0047.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0048.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0049.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0050.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0051.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0052.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0053.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0054.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0055.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0056.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0057.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0058.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0059.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0060.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0061.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0062.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0063.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0064.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0065.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0066.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0067.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0068.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0069.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0070.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0071.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0072.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0073.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0074.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0075.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0076.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0077.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0078.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0079.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0080.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0081.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0082.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0083.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0084.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0085.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0086.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0087.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0088.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0089.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0090.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0091.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0092.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0093.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0094.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0095.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0096.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0097.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0098.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0099.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0100.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0101.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0102.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0103.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0104.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0105.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0106.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0107.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0108.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0109.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0110.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0111.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0112.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0113.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0114.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0115.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7/frame_0116.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/98f6980c-b652-4f5f-afef-18c971d62ac7.mp4 filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0000.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0001.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0002.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0003.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0004.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0005.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0006.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0007.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0008.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0009.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0010.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0011.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0012.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0013.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0014.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0015.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0016.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0017.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0018.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0019.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0020.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0021.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0022.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0023.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0024.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0025.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0026.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0027.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0028.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0029.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0030.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0031.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0032.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0033.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0034.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0035.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0036.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0037.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0038.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0039.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0040.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0041.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0042.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0043.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0044.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0045.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0046.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0047.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0048.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0049.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0050.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0051.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0052.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0053.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0054.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0055.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0056.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0057.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0058.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0059.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0060.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0061.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0062.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0063.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0064.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0065.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0066.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0067.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0068.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0069.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0070.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0071.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0072.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0073.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0074.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0075.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0076.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0077.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0078.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0079.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0080.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0081.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0082.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0083.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0084.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0085.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0086.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0087.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0088.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0089.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0090.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0091.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0092.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0093.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0094.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0095.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0096.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0097.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0098.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0099.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0100.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0101.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0102.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0103.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0104.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0105.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0106.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0107.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0108.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0109.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0110.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0111.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0112.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0113.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0114.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0115.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811/frame_0116.jpg filter=lfs diff=lfs merge=lfs -text +xywang/infer/temp_videos/fd1d9099-d6d6-4b0d-b9d1-4bddd67d9811.mp4 filter=lfs diff=lfs merge=lfs -text +xywang/test_videos/cosplay.mp4 filter=lfs diff=lfs merge=lfs -text +xywang/test_videos/duoduo.mp4 filter=lfs diff=lfs merge=lfs -text +xywang/test_videos/fireworks.mp4 filter=lfs diff=lfs merge=lfs -text +xywang/test_videos/interview.mp4 filter=lfs diff=lfs merge=lfs -text +xywang/test_videos/moon.mp4 filter=lfs diff=lfs merge=lfs -text +xywang/test_videos/park.mp4 filter=lfs diff=lfs merge=lfs -text diff --git a/API_Transformers/__pycache__/video_processor.cpython-311.pyc b/API_Transformers/__pycache__/video_processor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..39d6637414e64367cd0405a183fba9ee97600d65 Binary files /dev/null and b/API_Transformers/__pycache__/video_processor.cpython-311.pyc differ diff --git a/API_Transformers/cal.py b/API_Transformers/cal.py new file mode 100644 index 0000000000000000000000000000000000000000..7e7ca781fef78a06fba78d592c3ae27e3452b193 --- /dev/null +++ b/API_Transformers/cal.py @@ -0,0 +1,18 @@ +import json + +metric = { + "tokens_per_second": [], + "peak_gpu_memory_mb": [], + "num_generated_tokens": [], + "inference_time": [], + "cpu_usage": [], +} +for key, value in json.load(open("/mnt/data/xiuying/Code/local_deploy/outputs/mini/mini_60s.json")).items(): + metric["tokens_per_second"].append(value["tokens_per_second"]) + metric["peak_gpu_memory_mb"].append(value["peak_gpu_memory_mb"]) + metric["num_generated_tokens"].append(value["num_generated_tokens"]) + metric["inference_time"].append(value["inference_time"]) + metric["cpu_usage"].append(value["cpu_usage"]) + +for key, value in metric.items(): + print(key, sum(value) / len(value)) \ No newline at end of file diff --git a/API_Transformers/delete.py b/API_Transformers/delete.py new file mode 100644 index 0000000000000000000000000000000000000000..67fab8a3782bc75f2fc2421c8966cf1afcfba1f8 --- /dev/null +++ b/API_Transformers/delete.py @@ -0,0 +1,10 @@ +import os + +files = ["/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_017.mp4", +"/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_018.mp4", +"/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_019.mp4", +"/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_020.mp4"] + +for file in files: + os.remove(file) + print(f"Deleted {file}") \ No newline at end of file diff --git a/API_Transformers/infer.py b/API_Transformers/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..fc00d57a109ffbff4ed4b826d261b651b7ea7d0a --- /dev/null +++ b/API_Transformers/infer.py @@ -0,0 +1,131 @@ +import os +import uuid +import time +import psutil +import uvicorn +import torch +import cv2 +import shutil +from fastapi import FastAPI, File, UploadFile, Form, HTTPException +from fastapi.responses import JSONResponse +from models.qwen import Qwen2VL +from models.gemma import Gemma +from models.minicpm import MiniCPM +from models.lfm import LFM2 +from video_processor import extract_frames, FrameSamplingMethod +import argparse +import json +import logging + + + +parser = argparse.ArgumentParser() +parser.add_argument("--model_path", type=str, default="Qwen/Qwen2.5-VL-3B-Instruct-AWQ") +args = parser.parse_args() + + + +# --- 日志和临时文件目录配置 --- +LOG_DIR = f"logs/{args.model_path.split('/')[-1]}" +OUTPUT_DIR = f"outputs/{args.model_path.split('/')[-1]}" +TEMP_VIDEO_DIR = "temp_videos" +os.makedirs(LOG_DIR, exist_ok=True) +os.makedirs(OUTPUT_DIR, exist_ok=True) +os.makedirs(TEMP_VIDEO_DIR, exist_ok=True) +start_time = time.strftime('%Y%m%d_%H%M%S') +log_filename = f"{LOG_DIR}/{start_time}.log" +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename=log_filename, filemode='a') + +# --- FastAPI 应用初始化 --- +app = FastAPI(title=f"{args.model_path} Video Inference Service") +total_output = {} +# --- 加载模型和处理器 --- +logging.info(f"Loading model: {args.model_path}") +model_load_start = time.time() +if "qwen" in args.model_path.lower(): + model = Qwen2VL(args.model_path) +elif "gemma" in args.model_path.lower(): + model = Gemma(args.model_path) +elif "minicpm" in args.model_path.lower(): + model = MiniCPM(args.model_path) +elif "lfm" in args.model_path.lower(): + model = LFM2(args.model_path) +model_load_end = time.time() +GPU_MEMORY_USAGE = f"{torch.cuda.memory_allocated(0)/1024**2:.2f} MB" if torch.cuda.is_available() else "N/A" +logging.info(f"Model loaded in {model_load_end - model_load_start:.2f} seconds") +logging.info(f"GPU Memory Usage after model load: {GPU_MEMORY_USAGE}") + +@app.post("/video-inference/") +async def video_inference( + prompt: str = Form(...), + video_file: str = Form(...), + sampling_method: FrameSamplingMethod = Form(FrameSamplingMethod.CONTENT_AWARE), + sampling_rate: int = Form(5), +): + """ + 接收视频和文本提示,进行推理并返回结果。 + """ + request_start_time = time.time() + request_id = str(uuid.uuid4()) + logging.info(f"[{request_id}] Received new video inference request. Prompt: '{prompt}', Video: '{video_file}'") + + if not video_file.endswith(".mp4"): + logging.error(f"[{request_id}] Uploaded file '{video_file}' is not a video.") + raise HTTPException(status_code=400, detail="Uploaded file is not a video.") + + file_extension = os.path.splitext(video_file)[1] + temp_video_path = os.path.join(TEMP_VIDEO_DIR, f"{request_id}{file_extension}") + temp_frame_dir = os.path.join(TEMP_VIDEO_DIR, request_id) + os.makedirs(temp_frame_dir, exist_ok=True) + + try: + + logging.info(f"[{request_id}] Video saved to temporary file: {temp_video_path}") + logging.info(f"[{request_id}] Extracting frames using method: {sampling_method.value}, rate/threshold: {sampling_rate}") + + frames = extract_frames(video_file, sampling_method, sampling_rate) + if not frames: + logging.error(f"[{request_id}] Could not extract any frames from the video: {temp_video_path}") + raise HTTPException(status_code=400, detail="Could not extract any frames from the video.") + + logging.info(f"[{request_id}] Extracted {len(frames)} frames successfully. Saving to temporary files...") + + # 将帧保存到临时文件并获取其路径 + frame_paths = [] + for i, frame in enumerate(frames): + frame_path = os.path.join(temp_frame_dir, f"frame_{i:04d}.jpg") + cv2.imwrite(frame_path, frame) + abs_frame_path = os.path.abspath(frame_path) + frame_paths.append(abs_frame_path) + + logging.info(f"[{request_id}] {len(frame_paths)} frames saved to {temp_frame_dir}") + + output = model.generate(frame_paths, prompt) + + logging.info(f"Tokens per second: {output['tokens_per_second']}, Peak GPU memory MB: {output['peak_gpu_memory_mb']}") + + inference_end_time = time.time() + cpu_usage = psutil.cpu_percent(interval=None) + cpu_core_utilization = psutil.cpu_percent(interval=None, percpu=True) + logging.info(f"[{request_id}] Inference time: {inference_end_time - request_start_time:.2f} seconds, CPU usage: {cpu_usage}%, CPU core utilization: {cpu_core_utilization}") + output["inference_time"] = inference_end_time - request_start_time + output["cpu_usage"] = cpu_usage + output["cpu_core_utilization"] = cpu_core_utilization + output["num_generated_tokens"] = output["num_generated_tokens"] + + return JSONResponse(content=output) + + except Exception as e: + logging.error(f"[{request_id}] An error occurred during processing: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"An error occurred during processing: {str(e)}") + finally: + if os.path.exists(temp_video_path): + os.remove(temp_video_path) + logging.info(f"[{request_id}] Cleaned up temporary file: {temp_video_path}") + if os.path.exists(temp_frame_dir): + shutil.rmtree(temp_frame_dir) + logging.info(f"[{request_id}] Cleaned up temporary frame directory: {temp_frame_dir}") + + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8010) diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250818_232556.log b/API_Transformers/logs/LFM2-VL-1.6B/20250818_232556.log new file mode 100644 index 0000000000000000000000000000000000000000..6654c177fb684724a31399275221eb8ff359929a --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250818_232556.log @@ -0,0 +1,14 @@ +2025-08-18 23:25:56 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-18 23:25:58 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 23:26:26 - INFO - Model loaded in 29.54 seconds +2025-08-18 23:26:26 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-18 23:28:45 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:28:45 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] Video saved to temporary file: temp_videos/2d0a4e6b-87a3-4f80-9d2e-24b74787acdb.mp4 +2025-08-18 23:28:45 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:28:48 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:28:48 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] 30 frames saved to temp_videos/2d0a4e6b-87a3-4f80-9d2e-24b74787acdb +2025-08-18 23:28:48 - INFO - Prompt token length: 783 +2025-08-18 23:28:54 - INFO - Tokens per second: 28.289322629768442, Peak GPU memory MB: 4206.375 +2025-08-18 23:28:54 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] Inference time: 8.48 seconds, CPU usage: 22.5%, CPU core utilization: [21.5, 23.9, 21.6, 23.0] +2025-08-18 23:28:54 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] Cleaned up temporary file: temp_videos/2d0a4e6b-87a3-4f80-9d2e-24b74787acdb.mp4 +2025-08-18 23:28:54 - INFO - [2d0a4e6b-87a3-4f80-9d2e-24b74787acdb] Cleaned up temporary frame directory: temp_videos/2d0a4e6b-87a3-4f80-9d2e-24b74787acdb diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250818_233101.log b/API_Transformers/logs/LFM2-VL-1.6B/20250818_233101.log new file mode 100644 index 0000000000000000000000000000000000000000..4890a4059b45a4b5adf250aa370b74193a912f7c --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250818_233101.log @@ -0,0 +1,28 @@ +2025-08-18 23:31:01 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-18 23:31:02 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 23:31:09 - INFO - Model loaded in 7.34 seconds +2025-08-18 23:31:09 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-18 23:31:56 - INFO - [653718a5-3216-4691-b041-10603df59fa5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:31:56 - INFO - [653718a5-3216-4691-b041-10603df59fa5] Video saved to temporary file: temp_videos/653718a5-3216-4691-b041-10603df59fa5.mp4 +2025-08-18 23:31:56 - INFO - [653718a5-3216-4691-b041-10603df59fa5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:32:01 - INFO - [653718a5-3216-4691-b041-10603df59fa5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:32:01 - INFO - [653718a5-3216-4691-b041-10603df59fa5] 30 frames saved to temp_videos/653718a5-3216-4691-b041-10603df59fa5 +2025-08-18 23:32:01 - ERROR - [653718a5-3216-4691-b041-10603df59fa5] An error occurred during processing: Incorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image. +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/local_deploy/infer.py", line 107, in video_inference + output = model.generate(frame_paths, prompt) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/local_deploy/models/lfm.py", line 52, in generate + inputs = self.processor.apply_chat_template( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/processing_utils.py", line 1552, in apply_chat_template + images.append(load_image(fname)) + ^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/image_utils.py", line 493, in load_image + raise TypeError( +TypeError: Incorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image. +2025-08-18 23:32:01 - INFO - [653718a5-3216-4691-b041-10603df59fa5] Cleaned up temporary file: temp_videos/653718a5-3216-4691-b041-10603df59fa5.mp4 +2025-08-18 23:32:01 - INFO - [653718a5-3216-4691-b041-10603df59fa5] Cleaned up temporary frame directory: temp_videos/653718a5-3216-4691-b041-10603df59fa5 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250818_233342.log b/API_Transformers/logs/LFM2-VL-1.6B/20250818_233342.log new file mode 100644 index 0000000000000000000000000000000000000000..2965cd0dc2ed9f91a93809e2217d97e4f4a8543e --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250818_233342.log @@ -0,0 +1,28 @@ +2025-08-18 23:33:42 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-18 23:33:43 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 23:33:49 - INFO - Model loaded in 7.34 seconds +2025-08-18 23:33:49 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-18 23:33:54 - INFO - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:33:54 - INFO - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] Video saved to temporary file: temp_videos/c448d9a9-b14c-4a05-a6b6-67879d899cd1.mp4 +2025-08-18 23:33:54 - INFO - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:33:57 - INFO - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:33:57 - INFO - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] 30 frames saved to temp_videos/c448d9a9-b14c-4a05-a6b6-67879d899cd1 +2025-08-18 23:33:57 - ERROR - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] An error occurred during processing: Incorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image. +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/local_deploy/infer.py", line 107, in video_inference + output = model.generate(frame_paths, prompt) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/local_deploy/models/lfm.py", line 52, in generate + inputs = self.processor.apply_chat_template( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/processing_utils.py", line 1552, in apply_chat_template + images.append(load_image(fname)) + ^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/image_utils.py", line 493, in load_image + raise TypeError( +TypeError: Incorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image. +2025-08-18 23:33:57 - INFO - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] Cleaned up temporary file: temp_videos/c448d9a9-b14c-4a05-a6b6-67879d899cd1.mp4 +2025-08-18 23:33:57 - INFO - [c448d9a9-b14c-4a05-a6b6-67879d899cd1] Cleaned up temporary frame directory: temp_videos/c448d9a9-b14c-4a05-a6b6-67879d899cd1 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250818_233635.log b/API_Transformers/logs/LFM2-VL-1.6B/20250818_233635.log new file mode 100644 index 0000000000000000000000000000000000000000..c7800a6c63adc3c66ff851737497673bed48c704 --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250818_233635.log @@ -0,0 +1,10 @@ +2025-08-18 23:36:35 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-18 23:36:36 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 23:36:43 - INFO - Model loaded in 7.59 seconds +2025-08-18 23:36:43 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-18 23:36:48 - INFO - [d90238c8-2478-4c9d-bb69-a2535b9d8011] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:36:48 - INFO - [d90238c8-2478-4c9d-bb69-a2535b9d8011] Video saved to temporary file: temp_videos/d90238c8-2478-4c9d-bb69-a2535b9d8011.mp4 +2025-08-18 23:36:48 - INFO - [d90238c8-2478-4c9d-bb69-a2535b9d8011] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:36:53 - INFO - [d90238c8-2478-4c9d-bb69-a2535b9d8011] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:36:53 - INFO - [d90238c8-2478-4c9d-bb69-a2535b9d8011] 30 frames saved to temp_videos/d90238c8-2478-4c9d-bb69-a2535b9d8011 +2025-08-18 23:36:56 - INFO - Prompt token length: 23084 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250818_234120.log b/API_Transformers/logs/LFM2-VL-1.6B/20250818_234120.log new file mode 100644 index 0000000000000000000000000000000000000000..62b766a2e730f79f122f25cf2900d8790da73257 --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250818_234120.log @@ -0,0 +1,34 @@ +2025-08-18 23:41:20 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-18 23:41:21 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 23:41:27 - INFO - Model loaded in 7.18 seconds +2025-08-18 23:41:27 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-18 23:41:34 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:41:34 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] Video saved to temporary file: temp_videos/5e9460f4-1fa1-4b3e-94b6-34725b6dc288.mp4 +2025-08-18 23:41:34 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:41:38 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:41:38 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] 30 frames saved to temp_videos/5e9460f4-1fa1-4b3e-94b6-34725b6dc288 +2025-08-18 23:41:39 - INFO - Prompt token length: 3584 +2025-08-18 23:41:55 - INFO - Tokens per second: 4.9186034292897505, Peak GPU memory MB: 9376.375 +2025-08-18 23:41:55 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] Inference time: 21.78 seconds, CPU usage: 65.6%, CPU core utilization: [62.8, 62.3, 67.5, 69.8] +2025-08-18 23:41:55 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] Cleaned up temporary file: temp_videos/5e9460f4-1fa1-4b3e-94b6-34725b6dc288.mp4 +2025-08-18 23:41:55 - INFO - [5e9460f4-1fa1-4b3e-94b6-34725b6dc288] Cleaned up temporary frame directory: temp_videos/5e9460f4-1fa1-4b3e-94b6-34725b6dc288 +2025-08-18 23:44:19 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:44:19 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] Video saved to temporary file: temp_videos/02d3e7ea-134b-413e-a2e6-d99b6e2130b1.mp4 +2025-08-18 23:44:19 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:44:22 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:44:22 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] 30 frames saved to temp_videos/02d3e7ea-134b-413e-a2e6-d99b6e2130b1 +2025-08-18 23:44:22 - INFO - Prompt token length: 3584 +2025-08-18 23:44:39 - INFO - Tokens per second: 4.978868742700231, Peak GPU memory MB: 9376.375 +2025-08-18 23:44:39 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] Inference time: 20.31 seconds, CPU usage: 53.2%, CPU core utilization: [52.9, 53.5, 55.2, 51.1] +2025-08-18 23:44:39 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] Cleaned up temporary file: temp_videos/02d3e7ea-134b-413e-a2e6-d99b6e2130b1.mp4 +2025-08-18 23:44:39 - INFO - [02d3e7ea-134b-413e-a2e6-d99b6e2130b1] Cleaned up temporary frame directory: temp_videos/02d3e7ea-134b-413e-a2e6-d99b6e2130b1 +2025-08-18 23:45:04 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:45:04 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] Video saved to temporary file: temp_videos/7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b.mp4 +2025-08-18 23:45:04 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:45:09 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:45:09 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] 30 frames saved to temp_videos/7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b +2025-08-18 23:45:09 - INFO - Prompt token length: 3584 +2025-08-18 23:45:26 - INFO - Tokens per second: 4.974079275567591, Peak GPU memory MB: 9376.375 +2025-08-18 23:45:26 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] Inference time: 21.92 seconds, CPU usage: 60.2%, CPU core utilization: [60.4, 56.6, 56.5, 67.3] +2025-08-18 23:45:26 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] Cleaned up temporary file: temp_videos/7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b.mp4 +2025-08-18 23:45:26 - INFO - [7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b] Cleaned up temporary frame directory: temp_videos/7f8e9ce9-aad6-4a0e-8e5c-2a5a8931110b diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250818_234837.log b/API_Transformers/logs/LFM2-VL-1.6B/20250818_234837.log new file mode 100644 index 0000000000000000000000000000000000000000..01a91c2f87cde80324e267a0edd15ca4ff18f6be --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250818_234837.log @@ -0,0 +1,14 @@ +2025-08-18 23:48:37 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-18 23:48:38 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 23:48:44 - INFO - Model loaded in 7.31 seconds +2025-08-18 23:48:44 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-18 23:49:06 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:49:06 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] Video saved to temporary file: temp_videos/20d53a50-ffe8-4d54-94e1-cd4a287c9be8.mp4 +2025-08-18 23:49:06 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:49:09 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:49:09 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] 30 frames saved to temp_videos/20d53a50-ffe8-4d54-94e1-cd4a287c9be8 +2025-08-18 23:49:10 - INFO - Prompt token length: 3584 +2025-08-18 23:49:27 - INFO - Tokens per second: 34.94049134256706, Peak GPU memory MB: 9376.375 +2025-08-18 23:49:27 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] Inference time: 20.83 seconds, CPU usage: 63.8%, CPU core utilization: [60.4, 62.1, 69.4, 63.2] +2025-08-18 23:49:27 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] Cleaned up temporary file: temp_videos/20d53a50-ffe8-4d54-94e1-cd4a287c9be8.mp4 +2025-08-18 23:49:27 - INFO - [20d53a50-ffe8-4d54-94e1-cd4a287c9be8] Cleaned up temporary frame directory: temp_videos/20d53a50-ffe8-4d54-94e1-cd4a287c9be8 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250818_234946.log b/API_Transformers/logs/LFM2-VL-1.6B/20250818_234946.log new file mode 100644 index 0000000000000000000000000000000000000000..7849b67b4a0974f62f55fbaf646538d2a2941251 --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250818_234946.log @@ -0,0 +1,1414 @@ +2025-08-18 23:49:46 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-18 23:49:48 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 23:49:54 - INFO - Model loaded in 7.69 seconds +2025-08-18 23:49:54 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-18 23:50:07 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:50:07 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] Video saved to temporary file: temp_videos/5b85d146-bffa-4d71-9f4d-1cadc376ace1.mp4 +2025-08-18 23:50:07 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:50:11 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:50:11 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] 30 frames saved to temp_videos/5b85d146-bffa-4d71-9f4d-1cadc376ace1 +2025-08-18 23:50:11 - INFO - Prompt token length: 3584 +2025-08-18 23:50:28 - INFO - Tokens per second: 39.115405931589194, Peak GPU memory MB: 9376.375 +2025-08-18 23:50:28 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] Inference time: 20.54 seconds, CPU usage: 66.5%, CPU core utilization: [64.7, 59.3, 65.1, 76.8] +2025-08-18 23:50:28 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] Cleaned up temporary file: temp_videos/5b85d146-bffa-4d71-9f4d-1cadc376ace1.mp4 +2025-08-18 23:50:28 - INFO - [5b85d146-bffa-4d71-9f4d-1cadc376ace1] Cleaned up temporary frame directory: temp_videos/5b85d146-bffa-4d71-9f4d-1cadc376ace1 +2025-08-18 23:50:28 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_002.mp4' +2025-08-18 23:50:28 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] Video saved to temporary file: temp_videos/e4be9349-3e3e-444d-8ae6-ac443c37f4a7.mp4 +2025-08-18 23:50:28 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:50:37 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:50:37 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] 30 frames saved to temp_videos/e4be9349-3e3e-444d-8ae6-ac443c37f4a7 +2025-08-18 23:50:38 - INFO - Prompt token length: 3584 +2025-08-18 23:50:55 - INFO - Tokens per second: 41.680290716366436, Peak GPU memory MB: 9376.375 +2025-08-18 23:50:55 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] Inference time: 26.45 seconds, CPU usage: 81.6%, CPU core utilization: [79.6, 82.3, 82.8, 81.6] +2025-08-18 23:50:55 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] Cleaned up temporary file: temp_videos/e4be9349-3e3e-444d-8ae6-ac443c37f4a7.mp4 +2025-08-18 23:50:55 - INFO - [e4be9349-3e3e-444d-8ae6-ac443c37f4a7] Cleaned up temporary frame directory: temp_videos/e4be9349-3e3e-444d-8ae6-ac443c37f4a7 +2025-08-18 23:50:55 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_003.mp4' +2025-08-18 23:50:55 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] Video saved to temporary file: temp_videos/491dbeca-2fe9-4af9-bfe2-a41fae17a378.mp4 +2025-08-18 23:50:55 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:51:02 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:51:02 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] 30 frames saved to temp_videos/491dbeca-2fe9-4af9-bfe2-a41fae17a378 +2025-08-18 23:51:02 - INFO - Prompt token length: 3584 +2025-08-18 23:51:21 - INFO - Tokens per second: 40.298898666280635, Peak GPU memory MB: 9376.375 +2025-08-18 23:51:21 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] Inference time: 26.21 seconds, CPU usage: 81.7%, CPU core utilization: [78.7, 80.5, 83.6, 83.9] +2025-08-18 23:51:21 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] Cleaned up temporary file: temp_videos/491dbeca-2fe9-4af9-bfe2-a41fae17a378.mp4 +2025-08-18 23:51:21 - INFO - [491dbeca-2fe9-4af9-bfe2-a41fae17a378] Cleaned up temporary frame directory: temp_videos/491dbeca-2fe9-4af9-bfe2-a41fae17a378 +2025-08-18 23:51:21 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_004.mp4' +2025-08-18 23:51:21 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] Video saved to temporary file: temp_videos/ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6.mp4 +2025-08-18 23:51:21 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:51:29 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:51:29 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] 30 frames saved to temp_videos/ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6 +2025-08-18 23:51:29 - INFO - Prompt token length: 3584 +2025-08-18 23:51:46 - INFO - Tokens per second: 40.64591219302388, Peak GPU memory MB: 9376.375 +2025-08-18 23:51:46 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] Inference time: 25.53 seconds, CPU usage: 77.0%, CPU core utilization: [74.3, 78.8, 81.3, 73.5] +2025-08-18 23:51:46 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] Cleaned up temporary file: temp_videos/ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6.mp4 +2025-08-18 23:51:46 - INFO - [ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6] Cleaned up temporary frame directory: temp_videos/ae624dc9-ae3c-4a66-b0af-5e7bdd0016c6 +2025-08-18 23:51:46 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_005.mp4' +2025-08-18 23:51:46 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] Video saved to temporary file: temp_videos/fa4499ec-e2f9-406a-8244-3903201f6f87.mp4 +2025-08-18 23:51:46 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:51:55 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:51:55 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] 30 frames saved to temp_videos/fa4499ec-e2f9-406a-8244-3903201f6f87 +2025-08-18 23:51:55 - INFO - Prompt token length: 3584 +2025-08-18 23:52:13 - INFO - Tokens per second: 40.392638063278525, Peak GPU memory MB: 9376.375 +2025-08-18 23:52:13 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] Inference time: 26.59 seconds, CPU usage: 78.7%, CPU core utilization: [82.9, 76.9, 73.8, 81.1] +2025-08-18 23:52:13 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] Cleaned up temporary file: temp_videos/fa4499ec-e2f9-406a-8244-3903201f6f87.mp4 +2025-08-18 23:52:13 - INFO - [fa4499ec-e2f9-406a-8244-3903201f6f87] Cleaned up temporary frame directory: temp_videos/fa4499ec-e2f9-406a-8244-3903201f6f87 +2025-08-18 23:52:13 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_006.mp4' +2025-08-18 23:52:13 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] Video saved to temporary file: temp_videos/32783f23-25a0-44e1-b0af-56305229da2c.mp4 +2025-08-18 23:52:13 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:52:22 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:52:22 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] 30 frames saved to temp_videos/32783f23-25a0-44e1-b0af-56305229da2c +2025-08-18 23:52:22 - INFO - Prompt token length: 3584 +2025-08-18 23:52:40 - INFO - Tokens per second: 39.74288596804158, Peak GPU memory MB: 9376.375 +2025-08-18 23:52:40 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] Inference time: 27.09 seconds, CPU usage: 78.7%, CPU core utilization: [75.1, 72.3, 82.9, 84.5] +2025-08-18 23:52:40 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] Cleaned up temporary file: temp_videos/32783f23-25a0-44e1-b0af-56305229da2c.mp4 +2025-08-18 23:52:40 - INFO - [32783f23-25a0-44e1-b0af-56305229da2c] Cleaned up temporary frame directory: temp_videos/32783f23-25a0-44e1-b0af-56305229da2c +2025-08-18 23:52:40 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_007.mp4' +2025-08-18 23:52:40 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] Video saved to temporary file: temp_videos/053bc21e-d02a-43eb-bf76-48cc75ee194d.mp4 +2025-08-18 23:52:40 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:52:49 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:52:49 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] 30 frames saved to temp_videos/053bc21e-d02a-43eb-bf76-48cc75ee194d +2025-08-18 23:52:50 - INFO - Prompt token length: 3584 +2025-08-18 23:53:07 - INFO - Tokens per second: 41.13905098824316, Peak GPU memory MB: 9376.375 +2025-08-18 23:53:07 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] Inference time: 26.66 seconds, CPU usage: 82.0%, CPU core utilization: [73.4, 81.4, 92.3, 80.9] +2025-08-18 23:53:07 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] Cleaned up temporary file: temp_videos/053bc21e-d02a-43eb-bf76-48cc75ee194d.mp4 +2025-08-18 23:53:07 - INFO - [053bc21e-d02a-43eb-bf76-48cc75ee194d] Cleaned up temporary frame directory: temp_videos/053bc21e-d02a-43eb-bf76-48cc75ee194d +2025-08-18 23:53:07 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_008.mp4' +2025-08-18 23:53:07 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] Video saved to temporary file: temp_videos/85e10722-b00d-4634-b9b6-47562a83db92.mp4 +2025-08-18 23:53:07 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:53:14 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:53:14 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] 30 frames saved to temp_videos/85e10722-b00d-4634-b9b6-47562a83db92 +2025-08-18 23:53:14 - INFO - Prompt token length: 3584 +2025-08-18 23:53:32 - INFO - Tokens per second: 40.618019438790945, Peak GPU memory MB: 9376.375 +2025-08-18 23:53:32 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] Inference time: 25.06 seconds, CPU usage: 78.6%, CPU core utilization: [76.3, 76.6, 77.4, 84.2] +2025-08-18 23:53:32 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] Cleaned up temporary file: temp_videos/85e10722-b00d-4634-b9b6-47562a83db92.mp4 +2025-08-18 23:53:32 - INFO - [85e10722-b00d-4634-b9b6-47562a83db92] Cleaned up temporary frame directory: temp_videos/85e10722-b00d-4634-b9b6-47562a83db92 +2025-08-18 23:53:32 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_009.mp4' +2025-08-18 23:53:32 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] Video saved to temporary file: temp_videos/9ab2e59d-198f-464b-a002-556f22438f14.mp4 +2025-08-18 23:53:32 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:53:38 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:53:38 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] 30 frames saved to temp_videos/9ab2e59d-198f-464b-a002-556f22438f14 +2025-08-18 23:53:39 - INFO - Prompt token length: 3584 +2025-08-18 23:53:56 - INFO - Tokens per second: 40.50383986283677, Peak GPU memory MB: 9376.375 +2025-08-18 23:53:56 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] Inference time: 24.17 seconds, CPU usage: 79.8%, CPU core utilization: [80.5, 76.1, 81.0, 81.6] +2025-08-18 23:53:56 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] Cleaned up temporary file: temp_videos/9ab2e59d-198f-464b-a002-556f22438f14.mp4 +2025-08-18 23:53:56 - INFO - [9ab2e59d-198f-464b-a002-556f22438f14] Cleaned up temporary frame directory: temp_videos/9ab2e59d-198f-464b-a002-556f22438f14 +2025-08-18 23:53:56 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_010.mp4' +2025-08-18 23:53:56 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] Video saved to temporary file: temp_videos/c7e8e335-61bb-4b6c-8ec6-242d64217f34.mp4 +2025-08-18 23:53:56 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:54:03 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:54:03 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] 30 frames saved to temp_videos/c7e8e335-61bb-4b6c-8ec6-242d64217f34 +2025-08-18 23:54:04 - INFO - Prompt token length: 3584 +2025-08-18 23:54:21 - INFO - Tokens per second: 38.89654933093534, Peak GPU memory MB: 9376.375 +2025-08-18 23:54:21 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] Inference time: 24.53 seconds, CPU usage: 78.8%, CPU core utilization: [72.9, 80.8, 84.5, 77.0] +2025-08-18 23:54:21 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] Cleaned up temporary file: temp_videos/c7e8e335-61bb-4b6c-8ec6-242d64217f34.mp4 +2025-08-18 23:54:21 - INFO - [c7e8e335-61bb-4b6c-8ec6-242d64217f34] Cleaned up temporary frame directory: temp_videos/c7e8e335-61bb-4b6c-8ec6-242d64217f34 +2025-08-18 23:54:21 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_011.mp4' +2025-08-18 23:54:21 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] Video saved to temporary file: temp_videos/49b2becb-b56a-4c77-a438-90950cf5a671.mp4 +2025-08-18 23:54:21 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:54:28 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:54:28 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] 30 frames saved to temp_videos/49b2becb-b56a-4c77-a438-90950cf5a671 +2025-08-18 23:54:29 - INFO - Prompt token length: 3584 +2025-08-18 23:54:46 - INFO - Tokens per second: 39.596664910065556, Peak GPU memory MB: 9376.375 +2025-08-18 23:54:46 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] Inference time: 25.26 seconds, CPU usage: 82.3%, CPU core utilization: [85.2, 80.6, 78.8, 84.8] +2025-08-18 23:54:46 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] Cleaned up temporary file: temp_videos/49b2becb-b56a-4c77-a438-90950cf5a671.mp4 +2025-08-18 23:54:46 - INFO - [49b2becb-b56a-4c77-a438-90950cf5a671] Cleaned up temporary frame directory: temp_videos/49b2becb-b56a-4c77-a438-90950cf5a671 +2025-08-18 23:54:46 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_012.mp4' +2025-08-18 23:54:46 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] Video saved to temporary file: temp_videos/456dd4dc-57b2-4a69-b2ce-7c75962473f9.mp4 +2025-08-18 23:54:46 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:54:54 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:54:54 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] 30 frames saved to temp_videos/456dd4dc-57b2-4a69-b2ce-7c75962473f9 +2025-08-18 23:54:55 - INFO - Prompt token length: 3584 +2025-08-18 23:55:13 - INFO - Tokens per second: 37.65531289972925, Peak GPU memory MB: 9376.375 +2025-08-18 23:55:13 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] Inference time: 26.73 seconds, CPU usage: 81.6%, CPU core utilization: [77.8, 81.0, 87.1, 80.5] +2025-08-18 23:55:13 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] Cleaned up temporary file: temp_videos/456dd4dc-57b2-4a69-b2ce-7c75962473f9.mp4 +2025-08-18 23:55:13 - INFO - [456dd4dc-57b2-4a69-b2ce-7c75962473f9] Cleaned up temporary frame directory: temp_videos/456dd4dc-57b2-4a69-b2ce-7c75962473f9 +2025-08-18 23:55:13 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_013.mp4' +2025-08-18 23:55:13 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] Video saved to temporary file: temp_videos/32a5d257-89ba-428b-a57b-e112ee1b6d6c.mp4 +2025-08-18 23:55:13 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:55:22 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:55:22 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] 30 frames saved to temp_videos/32a5d257-89ba-428b-a57b-e112ee1b6d6c +2025-08-18 23:55:22 - INFO - Prompt token length: 3584 +2025-08-18 23:55:40 - INFO - Tokens per second: 41.228258805708954, Peak GPU memory MB: 9376.375 +2025-08-18 23:55:40 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] Inference time: 26.69 seconds, CPU usage: 77.5%, CPU core utilization: [80.0, 75.9, 74.8, 79.5] +2025-08-18 23:55:40 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] Cleaned up temporary file: temp_videos/32a5d257-89ba-428b-a57b-e112ee1b6d6c.mp4 +2025-08-18 23:55:40 - INFO - [32a5d257-89ba-428b-a57b-e112ee1b6d6c] Cleaned up temporary frame directory: temp_videos/32a5d257-89ba-428b-a57b-e112ee1b6d6c +2025-08-18 23:55:40 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_014.mp4' +2025-08-18 23:55:40 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] Video saved to temporary file: temp_videos/58c62ee2-ee58-45e8-8112-7b73ac24b97b.mp4 +2025-08-18 23:55:40 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:55:48 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:55:48 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] 30 frames saved to temp_videos/58c62ee2-ee58-45e8-8112-7b73ac24b97b +2025-08-18 23:55:48 - INFO - Prompt token length: 3584 +2025-08-18 23:56:05 - INFO - Tokens per second: 41.332802503833406, Peak GPU memory MB: 9376.375 +2025-08-18 23:56:05 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] Inference time: 25.00 seconds, CPU usage: 77.5%, CPU core utilization: [79.1, 64.7, 75.0, 91.1] +2025-08-18 23:56:05 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] Cleaned up temporary file: temp_videos/58c62ee2-ee58-45e8-8112-7b73ac24b97b.mp4 +2025-08-18 23:56:05 - INFO - [58c62ee2-ee58-45e8-8112-7b73ac24b97b] Cleaned up temporary frame directory: temp_videos/58c62ee2-ee58-45e8-8112-7b73ac24b97b +2025-08-18 23:56:05 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_015.mp4' +2025-08-18 23:56:05 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] Video saved to temporary file: temp_videos/6b87fc41-21f7-49bd-b94b-53e37f44a55b.mp4 +2025-08-18 23:56:05 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:56:14 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:56:14 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] 30 frames saved to temp_videos/6b87fc41-21f7-49bd-b94b-53e37f44a55b +2025-08-18 23:56:14 - INFO - Prompt token length: 3584 +2025-08-18 23:56:32 - INFO - Tokens per second: 36.78989463023373, Peak GPU memory MB: 9376.375 +2025-08-18 23:56:32 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] Inference time: 26.94 seconds, CPU usage: 80.8%, CPU core utilization: [84.7, 70.3, 74.8, 93.2] +2025-08-18 23:56:32 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] Cleaned up temporary file: temp_videos/6b87fc41-21f7-49bd-b94b-53e37f44a55b.mp4 +2025-08-18 23:56:32 - INFO - [6b87fc41-21f7-49bd-b94b-53e37f44a55b] Cleaned up temporary frame directory: temp_videos/6b87fc41-21f7-49bd-b94b-53e37f44a55b +2025-08-18 23:56:32 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_016.mp4' +2025-08-18 23:56:32 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] Video saved to temporary file: temp_videos/c5c05ef2-ccb0-4476-b920-4faf8cf09dcc.mp4 +2025-08-18 23:56:32 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:56:38 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:56:38 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] 30 frames saved to temp_videos/c5c05ef2-ccb0-4476-b920-4faf8cf09dcc +2025-08-18 23:56:38 - INFO - Prompt token length: 3584 +2025-08-18 23:56:56 - INFO - Tokens per second: 40.060213000999134, Peak GPU memory MB: 9376.375 +2025-08-18 23:56:56 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] Inference time: 24.31 seconds, CPU usage: 75.3%, CPU core utilization: [72.0, 74.7, 80.4, 74.0] +2025-08-18 23:56:56 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] Cleaned up temporary file: temp_videos/c5c05ef2-ccb0-4476-b920-4faf8cf09dcc.mp4 +2025-08-18 23:56:56 - INFO - [c5c05ef2-ccb0-4476-b920-4faf8cf09dcc] Cleaned up temporary frame directory: temp_videos/c5c05ef2-ccb0-4476-b920-4faf8cf09dcc +2025-08-18 23:56:56 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_017.mp4' +2025-08-18 23:56:56 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] Video saved to temporary file: temp_videos/f4d1184d-df43-4b9a-852e-24b07020123e.mp4 +2025-08-18 23:56:56 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:57:04 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:57:04 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] 30 frames saved to temp_videos/f4d1184d-df43-4b9a-852e-24b07020123e +2025-08-18 23:57:05 - INFO - Prompt token length: 3584 +2025-08-18 23:57:22 - INFO - Tokens per second: 36.93354278794345, Peak GPU memory MB: 9376.375 +2025-08-18 23:57:22 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] Inference time: 26.13 seconds, CPU usage: 83.1%, CPU core utilization: [84.8, 76.8, 77.8, 93.1] +2025-08-18 23:57:22 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] Cleaned up temporary file: temp_videos/f4d1184d-df43-4b9a-852e-24b07020123e.mp4 +2025-08-18 23:57:22 - INFO - [f4d1184d-df43-4b9a-852e-24b07020123e] Cleaned up temporary frame directory: temp_videos/f4d1184d-df43-4b9a-852e-24b07020123e +2025-08-18 23:57:22 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_018.mp4' +2025-08-18 23:57:22 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] Video saved to temporary file: temp_videos/ed52fa7a-0839-48cc-a74c-7c70c20eabbb.mp4 +2025-08-18 23:57:22 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:57:29 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:57:29 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] 30 frames saved to temp_videos/ed52fa7a-0839-48cc-a74c-7c70c20eabbb +2025-08-18 23:57:30 - INFO - Prompt token length: 3584 +2025-08-18 23:57:47 - INFO - Tokens per second: 36.78504412759604, Peak GPU memory MB: 9376.375 +2025-08-18 23:57:47 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] Inference time: 25.17 seconds, CPU usage: 79.4%, CPU core utilization: [76.1, 67.9, 81.4, 92.2] +2025-08-18 23:57:47 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] Cleaned up temporary file: temp_videos/ed52fa7a-0839-48cc-a74c-7c70c20eabbb.mp4 +2025-08-18 23:57:47 - INFO - [ed52fa7a-0839-48cc-a74c-7c70c20eabbb] Cleaned up temporary frame directory: temp_videos/ed52fa7a-0839-48cc-a74c-7c70c20eabbb +2025-08-18 23:57:48 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_019.mp4' +2025-08-18 23:57:48 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] Video saved to temporary file: temp_videos/c3a69722-85bb-4db6-b468-93c711e557a6.mp4 +2025-08-18 23:57:48 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:57:56 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:57:56 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] 30 frames saved to temp_videos/c3a69722-85bb-4db6-b468-93c711e557a6 +2025-08-18 23:57:56 - INFO - Prompt token length: 3584 +2025-08-18 23:58:13 - INFO - Tokens per second: 40.30796192039967, Peak GPU memory MB: 9376.375 +2025-08-18 23:58:13 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] Inference time: 25.52 seconds, CPU usage: 81.2%, CPU core utilization: [84.0, 77.4, 78.2, 85.0] +2025-08-18 23:58:13 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] Cleaned up temporary file: temp_videos/c3a69722-85bb-4db6-b468-93c711e557a6.mp4 +2025-08-18 23:58:13 - INFO - [c3a69722-85bb-4db6-b468-93c711e557a6] Cleaned up temporary frame directory: temp_videos/c3a69722-85bb-4db6-b468-93c711e557a6 +2025-08-18 23:58:13 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_020.mp4' +2025-08-18 23:58:13 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] Video saved to temporary file: temp_videos/9ba27944-0c7a-4458-848f-3e18f6cf5d1f.mp4 +2025-08-18 23:58:13 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:58:20 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:58:20 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] 30 frames saved to temp_videos/9ba27944-0c7a-4458-848f-3e18f6cf5d1f +2025-08-18 23:58:20 - INFO - Prompt token length: 3584 +2025-08-18 23:58:37 - INFO - Tokens per second: 38.74070756902105, Peak GPU memory MB: 9376.375 +2025-08-18 23:58:37 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] Inference time: 24.27 seconds, CPU usage: 80.2%, CPU core utilization: [80.6, 69.2, 75.8, 95.3] +2025-08-18 23:58:37 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] Cleaned up temporary file: temp_videos/9ba27944-0c7a-4458-848f-3e18f6cf5d1f.mp4 +2025-08-18 23:58:37 - INFO - [9ba27944-0c7a-4458-848f-3e18f6cf5d1f] Cleaned up temporary frame directory: temp_videos/9ba27944-0c7a-4458-848f-3e18f6cf5d1f +2025-08-18 23:58:37 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_021.mp4' +2025-08-18 23:58:37 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] Video saved to temporary file: temp_videos/0ddba018-22df-4eb0-b27e-fd30db4de720.mp4 +2025-08-18 23:58:37 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:58:45 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:58:45 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] 30 frames saved to temp_videos/0ddba018-22df-4eb0-b27e-fd30db4de720 +2025-08-18 23:58:45 - INFO - Prompt token length: 3584 +2025-08-18 23:59:02 - INFO - Tokens per second: 41.32099005919523, Peak GPU memory MB: 9376.375 +2025-08-18 23:59:02 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] Inference time: 25.02 seconds, CPU usage: 80.5%, CPU core utilization: [79.4, 80.7, 84.1, 77.7] +2025-08-18 23:59:02 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] Cleaned up temporary file: temp_videos/0ddba018-22df-4eb0-b27e-fd30db4de720.mp4 +2025-08-18 23:59:02 - INFO - [0ddba018-22df-4eb0-b27e-fd30db4de720] Cleaned up temporary frame directory: temp_videos/0ddba018-22df-4eb0-b27e-fd30db4de720 +2025-08-18 23:59:02 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_022.mp4' +2025-08-18 23:59:02 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] Video saved to temporary file: temp_videos/bfd342ca-0347-49c4-9a94-6552d76e19a0.mp4 +2025-08-18 23:59:02 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:59:09 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:59:09 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] 30 frames saved to temp_videos/bfd342ca-0347-49c4-9a94-6552d76e19a0 +2025-08-18 23:59:10 - INFO - Prompt token length: 3584 +2025-08-18 23:59:27 - INFO - Tokens per second: 41.598046238624825, Peak GPU memory MB: 9376.375 +2025-08-18 23:59:27 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] Inference time: 24.73 seconds, CPU usage: 82.0%, CPU core utilization: [85.4, 74.6, 76.6, 91.5] +2025-08-18 23:59:27 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] Cleaned up temporary file: temp_videos/bfd342ca-0347-49c4-9a94-6552d76e19a0.mp4 +2025-08-18 23:59:27 - INFO - [bfd342ca-0347-49c4-9a94-6552d76e19a0] Cleaned up temporary frame directory: temp_videos/bfd342ca-0347-49c4-9a94-6552d76e19a0 +2025-08-18 23:59:27 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_023.mp4' +2025-08-18 23:59:27 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] Video saved to temporary file: temp_videos/d5c137c0-5dd7-4995-9c34-77af504ed64c.mp4 +2025-08-18 23:59:27 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:59:35 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:59:35 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] 30 frames saved to temp_videos/d5c137c0-5dd7-4995-9c34-77af504ed64c +2025-08-18 23:59:35 - INFO - Prompt token length: 3584 +2025-08-18 23:59:53 - INFO - Tokens per second: 40.963503525455884, Peak GPU memory MB: 9376.375 +2025-08-18 23:59:53 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] Inference time: 25.89 seconds, CPU usage: 77.9%, CPU core utilization: [75.3, 79.5, 79.3, 77.6] +2025-08-18 23:59:53 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] Cleaned up temporary file: temp_videos/d5c137c0-5dd7-4995-9c34-77af504ed64c.mp4 +2025-08-18 23:59:53 - INFO - [d5c137c0-5dd7-4995-9c34-77af504ed64c] Cleaned up temporary frame directory: temp_videos/d5c137c0-5dd7-4995-9c34-77af504ed64c +2025-08-18 23:59:53 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_024.mp4' +2025-08-18 23:59:53 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] Video saved to temporary file: temp_videos/cca4ad1a-6d1b-4fd2-b6e8-78efa4097226.mp4 +2025-08-18 23:59:53 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:00:00 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:00:00 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] 30 frames saved to temp_videos/cca4ad1a-6d1b-4fd2-b6e8-78efa4097226 +2025-08-19 00:00:01 - INFO - Prompt token length: 3584 +2025-08-19 00:00:19 - INFO - Tokens per second: 39.38300814479115, Peak GPU memory MB: 9376.375 +2025-08-19 00:00:19 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] Inference time: 25.45 seconds, CPU usage: 78.2%, CPU core utilization: [81.7, 80.8, 73.0, 77.4] +2025-08-19 00:00:19 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] Cleaned up temporary file: temp_videos/cca4ad1a-6d1b-4fd2-b6e8-78efa4097226.mp4 +2025-08-19 00:00:19 - INFO - [cca4ad1a-6d1b-4fd2-b6e8-78efa4097226] Cleaned up temporary frame directory: temp_videos/cca4ad1a-6d1b-4fd2-b6e8-78efa4097226 +2025-08-19 00:00:19 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_025.mp4' +2025-08-19 00:00:19 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] Video saved to temporary file: temp_videos/57cde0f2-e32d-45b8-88da-1c10a65908ea.mp4 +2025-08-19 00:00:19 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:00:28 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:00:28 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] 30 frames saved to temp_videos/57cde0f2-e32d-45b8-88da-1c10a65908ea +2025-08-19 00:00:28 - INFO - Prompt token length: 3584 +2025-08-19 00:00:46 - INFO - Tokens per second: 40.238911139014256, Peak GPU memory MB: 9376.375 +2025-08-19 00:00:46 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] Inference time: 26.96 seconds, CPU usage: 78.2%, CPU core utilization: [76.9, 69.2, 76.6, 89.9] +2025-08-19 00:00:46 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] Cleaned up temporary file: temp_videos/57cde0f2-e32d-45b8-88da-1c10a65908ea.mp4 +2025-08-19 00:00:46 - INFO - [57cde0f2-e32d-45b8-88da-1c10a65908ea] Cleaned up temporary frame directory: temp_videos/57cde0f2-e32d-45b8-88da-1c10a65908ea +2025-08-19 00:00:46 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_026.mp4' +2025-08-19 00:00:46 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] Video saved to temporary file: temp_videos/4fa7d4e0-e86a-4210-a732-d56708578419.mp4 +2025-08-19 00:00:46 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:00:54 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:00:54 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] 30 frames saved to temp_videos/4fa7d4e0-e86a-4210-a732-d56708578419 +2025-08-19 00:00:54 - INFO - Prompt token length: 3584 +2025-08-19 00:01:12 - INFO - Tokens per second: 40.17368557750544, Peak GPU memory MB: 9376.375 +2025-08-19 00:01:12 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] Inference time: 25.94 seconds, CPU usage: 78.3%, CPU core utilization: [74.2, 81.6, 81.0, 76.3] +2025-08-19 00:01:12 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] Cleaned up temporary file: temp_videos/4fa7d4e0-e86a-4210-a732-d56708578419.mp4 +2025-08-19 00:01:12 - INFO - [4fa7d4e0-e86a-4210-a732-d56708578419] Cleaned up temporary frame directory: temp_videos/4fa7d4e0-e86a-4210-a732-d56708578419 +2025-08-19 00:01:12 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_027.mp4' +2025-08-19 00:01:12 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] Video saved to temporary file: temp_videos/4b0256b7-7dda-4b1d-b1e3-107fd5502f33.mp4 +2025-08-19 00:01:12 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:01:20 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:01:20 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] 30 frames saved to temp_videos/4b0256b7-7dda-4b1d-b1e3-107fd5502f33 +2025-08-19 00:01:20 - INFO - Prompt token length: 3584 +2025-08-19 00:01:38 - INFO - Tokens per second: 39.38549335675344, Peak GPU memory MB: 9376.375 +2025-08-19 00:01:38 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] Inference time: 26.07 seconds, CPU usage: 78.8%, CPU core utilization: [80.6, 60.0, 77.3, 97.3] +2025-08-19 00:01:38 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] Cleaned up temporary file: temp_videos/4b0256b7-7dda-4b1d-b1e3-107fd5502f33.mp4 +2025-08-19 00:01:38 - INFO - [4b0256b7-7dda-4b1d-b1e3-107fd5502f33] Cleaned up temporary frame directory: temp_videos/4b0256b7-7dda-4b1d-b1e3-107fd5502f33 +2025-08-19 00:01:38 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_028.mp4' +2025-08-19 00:01:38 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] Video saved to temporary file: temp_videos/daefaac9-786f-4174-831a-e62abe9645da.mp4 +2025-08-19 00:01:38 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:01:46 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:01:46 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] 30 frames saved to temp_videos/daefaac9-786f-4174-831a-e62abe9645da +2025-08-19 00:01:46 - INFO - Prompt token length: 3584 +2025-08-19 00:02:04 - INFO - Tokens per second: 37.358557004221744, Peak GPU memory MB: 9376.375 +2025-08-19 00:02:04 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] Inference time: 25.99 seconds, CPU usage: 81.2%, CPU core utilization: [82.6, 72.3, 80.2, 89.7] +2025-08-19 00:02:04 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] Cleaned up temporary file: temp_videos/daefaac9-786f-4174-831a-e62abe9645da.mp4 +2025-08-19 00:02:04 - INFO - [daefaac9-786f-4174-831a-e62abe9645da] Cleaned up temporary frame directory: temp_videos/daefaac9-786f-4174-831a-e62abe9645da +2025-08-19 00:02:04 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_029.mp4' +2025-08-19 00:02:04 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] Video saved to temporary file: temp_videos/28340fa9-c0b9-4616-8e5a-abdea612caca.mp4 +2025-08-19 00:02:04 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:02:12 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:02:12 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] 30 frames saved to temp_videos/28340fa9-c0b9-4616-8e5a-abdea612caca +2025-08-19 00:02:12 - INFO - Prompt token length: 3584 +2025-08-19 00:02:30 - INFO - Tokens per second: 40.02365392147096, Peak GPU memory MB: 9376.375 +2025-08-19 00:02:30 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] Inference time: 25.66 seconds, CPU usage: 80.4%, CPU core utilization: [78.8, 83.4, 81.6, 77.7] +2025-08-19 00:02:30 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] Cleaned up temporary file: temp_videos/28340fa9-c0b9-4616-8e5a-abdea612caca.mp4 +2025-08-19 00:02:30 - INFO - [28340fa9-c0b9-4616-8e5a-abdea612caca] Cleaned up temporary frame directory: temp_videos/28340fa9-c0b9-4616-8e5a-abdea612caca +2025-08-19 00:02:30 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_030.mp4' +2025-08-19 00:02:30 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] Video saved to temporary file: temp_videos/a5806023-e539-4968-8df8-fd2c13edf754.mp4 +2025-08-19 00:02:30 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:02:38 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:02:39 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] 30 frames saved to temp_videos/a5806023-e539-4968-8df8-fd2c13edf754 +2025-08-19 00:02:39 - INFO - Prompt token length: 3584 +2025-08-19 00:02:56 - INFO - Tokens per second: 40.11205537436322, Peak GPU memory MB: 9376.375 +2025-08-19 00:02:56 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] Inference time: 26.14 seconds, CPU usage: 83.8%, CPU core utilization: [81.8, 83.3, 83.5, 86.6] +2025-08-19 00:02:56 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] Cleaned up temporary file: temp_videos/a5806023-e539-4968-8df8-fd2c13edf754.mp4 +2025-08-19 00:02:56 - INFO - [a5806023-e539-4968-8df8-fd2c13edf754] Cleaned up temporary frame directory: temp_videos/a5806023-e539-4968-8df8-fd2c13edf754 +2025-08-19 00:02:56 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_031.mp4' +2025-08-19 00:02:56 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] Video saved to temporary file: temp_videos/00457f66-44f2-4306-b6df-89fbec279f13.mp4 +2025-08-19 00:02:56 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:03:02 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:03:02 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] 30 frames saved to temp_videos/00457f66-44f2-4306-b6df-89fbec279f13 +2025-08-19 00:03:02 - INFO - Prompt token length: 3584 +2025-08-19 00:03:20 - INFO - Tokens per second: 43.732303463922946, Peak GPU memory MB: 9376.375 +2025-08-19 00:03:20 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] Inference time: 23.93 seconds, CPU usage: 47.0%, CPU core utilization: [37.9, 27.6, 39.6, 83.1] +2025-08-19 00:03:20 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] Cleaned up temporary file: temp_videos/00457f66-44f2-4306-b6df-89fbec279f13.mp4 +2025-08-19 00:03:20 - INFO - [00457f66-44f2-4306-b6df-89fbec279f13] Cleaned up temporary frame directory: temp_videos/00457f66-44f2-4306-b6df-89fbec279f13 +2025-08-19 00:03:20 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_032.mp4' +2025-08-19 00:03:20 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] Video saved to temporary file: temp_videos/80676197-5b9e-422d-aa2f-fe2b270a1d4a.mp4 +2025-08-19 00:03:20 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:03:25 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:03:25 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] 30 frames saved to temp_videos/80676197-5b9e-422d-aa2f-fe2b270a1d4a +2025-08-19 00:03:26 - INFO - Prompt token length: 3584 +2025-08-19 00:03:43 - INFO - Tokens per second: 41.1306443391851, Peak GPU memory MB: 9376.375 +2025-08-19 00:03:43 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] Inference time: 22.85 seconds, CPU usage: 48.6%, CPU core utilization: [40.5, 38.9, 83.4, 31.3] +2025-08-19 00:03:43 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] Cleaned up temporary file: temp_videos/80676197-5b9e-422d-aa2f-fe2b270a1d4a.mp4 +2025-08-19 00:03:43 - INFO - [80676197-5b9e-422d-aa2f-fe2b270a1d4a] Cleaned up temporary frame directory: temp_videos/80676197-5b9e-422d-aa2f-fe2b270a1d4a +2025-08-19 00:03:43 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_033.mp4' +2025-08-19 00:03:43 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] Video saved to temporary file: temp_videos/c1da6087-e52a-4275-b20e-e8a3da10f9cc.mp4 +2025-08-19 00:03:43 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:03:51 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:03:51 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] 30 frames saved to temp_videos/c1da6087-e52a-4275-b20e-e8a3da10f9cc +2025-08-19 00:03:51 - INFO - Prompt token length: 3584 +2025-08-19 00:04:09 - INFO - Tokens per second: 39.2482211811343, Peak GPU memory MB: 9376.375 +2025-08-19 00:04:09 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] Inference time: 26.37 seconds, CPU usage: 79.9%, CPU core utilization: [70.7, 79.5, 91.0, 78.5] +2025-08-19 00:04:09 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] Cleaned up temporary file: temp_videos/c1da6087-e52a-4275-b20e-e8a3da10f9cc.mp4 +2025-08-19 00:04:09 - INFO - [c1da6087-e52a-4275-b20e-e8a3da10f9cc] Cleaned up temporary frame directory: temp_videos/c1da6087-e52a-4275-b20e-e8a3da10f9cc +2025-08-19 00:04:09 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_034.mp4' +2025-08-19 00:04:09 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] Video saved to temporary file: temp_videos/43b97ff6-b7c0-4ee6-ba9a-60d717f3cada.mp4 +2025-08-19 00:04:09 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:04:17 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:04:17 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] 30 frames saved to temp_videos/43b97ff6-b7c0-4ee6-ba9a-60d717f3cada +2025-08-19 00:04:17 - INFO - Prompt token length: 3584 +2025-08-19 00:04:35 - INFO - Tokens per second: 43.710601210811305, Peak GPU memory MB: 9376.375 +2025-08-19 00:04:35 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] Inference time: 25.70 seconds, CPU usage: 50.7%, CPU core utilization: [57.0, 41.9, 65.6, 38.4] +2025-08-19 00:04:35 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] Cleaned up temporary file: temp_videos/43b97ff6-b7c0-4ee6-ba9a-60d717f3cada.mp4 +2025-08-19 00:04:35 - INFO - [43b97ff6-b7c0-4ee6-ba9a-60d717f3cada] Cleaned up temporary frame directory: temp_videos/43b97ff6-b7c0-4ee6-ba9a-60d717f3cada +2025-08-19 00:04:35 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_035.mp4' +2025-08-19 00:04:35 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] Video saved to temporary file: temp_videos/c997afa8-679a-4327-aa64-45c188f280cd.mp4 +2025-08-19 00:04:35 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:04:40 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:04:40 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] 30 frames saved to temp_videos/c997afa8-679a-4327-aa64-45c188f280cd +2025-08-19 00:04:40 - INFO - Prompt token length: 3584 +2025-08-19 00:04:56 - INFO - Tokens per second: 43.34082730242674, Peak GPU memory MB: 9376.375 +2025-08-19 00:04:56 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] Inference time: 21.51 seconds, CPU usage: 44.1%, CPU core utilization: [24.8, 29.9, 95.2, 26.4] +2025-08-19 00:04:56 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] Cleaned up temporary file: temp_videos/c997afa8-679a-4327-aa64-45c188f280cd.mp4 +2025-08-19 00:04:56 - INFO - [c997afa8-679a-4327-aa64-45c188f280cd] Cleaned up temporary frame directory: temp_videos/c997afa8-679a-4327-aa64-45c188f280cd +2025-08-19 00:04:56 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_036.mp4' +2025-08-19 00:04:56 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] Video saved to temporary file: temp_videos/c56130e4-b57c-458d-a920-541968a81cf3.mp4 +2025-08-19 00:04:56 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:05:02 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:05:02 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] 30 frames saved to temp_videos/c56130e4-b57c-458d-a920-541968a81cf3 +2025-08-19 00:05:02 - INFO - Prompt token length: 3584 +2025-08-19 00:05:19 - INFO - Tokens per second: 43.305425094512614, Peak GPU memory MB: 9376.375 +2025-08-19 00:05:19 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] Inference time: 23.02 seconds, CPU usage: 52.2%, CPU core utilization: [40.0, 54.3, 38.4, 75.8] +2025-08-19 00:05:19 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] Cleaned up temporary file: temp_videos/c56130e4-b57c-458d-a920-541968a81cf3.mp4 +2025-08-19 00:05:19 - INFO - [c56130e4-b57c-458d-a920-541968a81cf3] Cleaned up temporary frame directory: temp_videos/c56130e4-b57c-458d-a920-541968a81cf3 +2025-08-19 00:05:19 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_037.mp4' +2025-08-19 00:05:19 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] Video saved to temporary file: temp_videos/0dd552db-5b57-4603-8e45-7fe3961ba2c9.mp4 +2025-08-19 00:05:19 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:05:25 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:05:25 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] 30 frames saved to temp_videos/0dd552db-5b57-4603-8e45-7fe3961ba2c9 +2025-08-19 00:05:25 - INFO - Prompt token length: 3584 +2025-08-19 00:05:42 - INFO - Tokens per second: 43.2809732947507, Peak GPU memory MB: 9376.375 +2025-08-19 00:05:42 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] Inference time: 22.87 seconds, CPU usage: 57.7%, CPU core utilization: [43.8, 51.8, 47.4, 87.6] +2025-08-19 00:05:42 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] Cleaned up temporary file: temp_videos/0dd552db-5b57-4603-8e45-7fe3961ba2c9.mp4 +2025-08-19 00:05:42 - INFO - [0dd552db-5b57-4603-8e45-7fe3961ba2c9] Cleaned up temporary frame directory: temp_videos/0dd552db-5b57-4603-8e45-7fe3961ba2c9 +2025-08-19 00:05:42 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_038.mp4' +2025-08-19 00:05:42 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] Video saved to temporary file: temp_videos/989cb63a-cbe6-46a5-991d-86c35393ffc3.mp4 +2025-08-19 00:05:42 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:05:48 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:05:48 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] 30 frames saved to temp_videos/989cb63a-cbe6-46a5-991d-86c35393ffc3 +2025-08-19 00:05:48 - INFO - Prompt token length: 3584 +2025-08-19 00:06:04 - INFO - Tokens per second: 44.07200758227364, Peak GPU memory MB: 9376.375 +2025-08-19 00:06:04 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] Inference time: 21.80 seconds, CPU usage: 45.3%, CPU core utilization: [32.1, 53.0, 27.2, 68.6] +2025-08-19 00:06:04 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] Cleaned up temporary file: temp_videos/989cb63a-cbe6-46a5-991d-86c35393ffc3.mp4 +2025-08-19 00:06:04 - INFO - [989cb63a-cbe6-46a5-991d-86c35393ffc3] Cleaned up temporary frame directory: temp_videos/989cb63a-cbe6-46a5-991d-86c35393ffc3 +2025-08-19 00:06:04 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_039.mp4' +2025-08-19 00:06:04 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] Video saved to temporary file: temp_videos/f9637ae3-e9b1-4708-afa3-d9381ffd5d20.mp4 +2025-08-19 00:06:04 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:06:10 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:06:10 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] 30 frames saved to temp_videos/f9637ae3-e9b1-4708-afa3-d9381ffd5d20 +2025-08-19 00:06:10 - INFO - Prompt token length: 3584 +2025-08-19 00:06:28 - INFO - Tokens per second: 43.32052884810036, Peak GPU memory MB: 9376.375 +2025-08-19 00:06:28 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] Inference time: 23.63 seconds, CPU usage: 45.4%, CPU core utilization: [36.6, 29.6, 80.9, 34.6] +2025-08-19 00:06:28 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] Cleaned up temporary file: temp_videos/f9637ae3-e9b1-4708-afa3-d9381ffd5d20.mp4 +2025-08-19 00:06:28 - INFO - [f9637ae3-e9b1-4708-afa3-d9381ffd5d20] Cleaned up temporary frame directory: temp_videos/f9637ae3-e9b1-4708-afa3-d9381ffd5d20 +2025-08-19 00:06:28 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_040.mp4' +2025-08-19 00:06:28 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] Video saved to temporary file: temp_videos/11035153-96bf-4afb-beee-02dc4cdcf12c.mp4 +2025-08-19 00:06:28 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:06:33 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:06:33 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] 30 frames saved to temp_videos/11035153-96bf-4afb-beee-02dc4cdcf12c +2025-08-19 00:06:33 - INFO - Prompt token length: 3584 +2025-08-19 00:06:50 - INFO - Tokens per second: 43.58098238207414, Peak GPU memory MB: 9376.375 +2025-08-19 00:06:50 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] Inference time: 22.50 seconds, CPU usage: 43.7%, CPU core utilization: [29.7, 54.7, 25.5, 64.7] +2025-08-19 00:06:50 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] Cleaned up temporary file: temp_videos/11035153-96bf-4afb-beee-02dc4cdcf12c.mp4 +2025-08-19 00:06:50 - INFO - [11035153-96bf-4afb-beee-02dc4cdcf12c] Cleaned up temporary frame directory: temp_videos/11035153-96bf-4afb-beee-02dc4cdcf12c +2025-08-19 00:06:50 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_041.mp4' +2025-08-19 00:06:50 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] Video saved to temporary file: temp_videos/94ded1d7-2604-432a-b9de-0a959b98de5f.mp4 +2025-08-19 00:06:50 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:06:55 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:06:55 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] 30 frames saved to temp_videos/94ded1d7-2604-432a-b9de-0a959b98de5f +2025-08-19 00:06:55 - INFO - Prompt token length: 3584 +2025-08-19 00:07:13 - INFO - Tokens per second: 43.43359338437597, Peak GPU memory MB: 9376.375 +2025-08-19 00:07:13 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] Inference time: 22.66 seconds, CPU usage: 42.2%, CPU core utilization: [50.8, 51.4, 39.2, 27.5] +2025-08-19 00:07:13 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] Cleaned up temporary file: temp_videos/94ded1d7-2604-432a-b9de-0a959b98de5f.mp4 +2025-08-19 00:07:13 - INFO - [94ded1d7-2604-432a-b9de-0a959b98de5f] Cleaned up temporary frame directory: temp_videos/94ded1d7-2604-432a-b9de-0a959b98de5f +2025-08-19 00:07:13 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_042.mp4' +2025-08-19 00:07:13 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] Video saved to temporary file: temp_videos/04a5e841-cb46-43a2-8355-383a35e63763.mp4 +2025-08-19 00:07:13 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:07:19 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:07:19 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] 30 frames saved to temp_videos/04a5e841-cb46-43a2-8355-383a35e63763 +2025-08-19 00:07:19 - INFO - Prompt token length: 3584 +2025-08-19 00:07:37 - INFO - Tokens per second: 43.5451659695564, Peak GPU memory MB: 9376.375 +2025-08-19 00:07:37 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] Inference time: 23.55 seconds, CPU usage: 45.3%, CPU core utilization: [28.8, 62.1, 31.5, 58.8] +2025-08-19 00:07:37 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] Cleaned up temporary file: temp_videos/04a5e841-cb46-43a2-8355-383a35e63763.mp4 +2025-08-19 00:07:37 - INFO - [04a5e841-cb46-43a2-8355-383a35e63763] Cleaned up temporary frame directory: temp_videos/04a5e841-cb46-43a2-8355-383a35e63763 +2025-08-19 00:07:37 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_043.mp4' +2025-08-19 00:07:37 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] Video saved to temporary file: temp_videos/8e63cecf-39ae-4caf-8310-1519e7d7f95b.mp4 +2025-08-19 00:07:37 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:07:42 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:07:42 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] 30 frames saved to temp_videos/8e63cecf-39ae-4caf-8310-1519e7d7f95b +2025-08-19 00:07:42 - INFO - Prompt token length: 3584 +2025-08-19 00:08:00 - INFO - Tokens per second: 43.72216137040779, Peak GPU memory MB: 9376.375 +2025-08-19 00:08:00 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] Inference time: 22.74 seconds, CPU usage: 45.6%, CPU core utilization: [38.4, 38.5, 74.5, 31.2] +2025-08-19 00:08:00 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] Cleaned up temporary file: temp_videos/8e63cecf-39ae-4caf-8310-1519e7d7f95b.mp4 +2025-08-19 00:08:00 - INFO - [8e63cecf-39ae-4caf-8310-1519e7d7f95b] Cleaned up temporary frame directory: temp_videos/8e63cecf-39ae-4caf-8310-1519e7d7f95b +2025-08-19 00:08:00 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_044.mp4' +2025-08-19 00:08:00 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] Video saved to temporary file: temp_videos/fcaedee9-352c-4494-bd92-a92bb3d1f792.mp4 +2025-08-19 00:08:00 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:08:05 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:08:05 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] 30 frames saved to temp_videos/fcaedee9-352c-4494-bd92-a92bb3d1f792 +2025-08-19 00:08:05 - INFO - Prompt token length: 3584 +2025-08-19 00:08:22 - INFO - Tokens per second: 43.69812721151038, Peak GPU memory MB: 9376.375 +2025-08-19 00:08:22 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] Inference time: 22.37 seconds, CPU usage: 43.8%, CPU core utilization: [24.1, 76.7, 32.2, 41.8] +2025-08-19 00:08:22 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] Cleaned up temporary file: temp_videos/fcaedee9-352c-4494-bd92-a92bb3d1f792.mp4 +2025-08-19 00:08:22 - INFO - [fcaedee9-352c-4494-bd92-a92bb3d1f792] Cleaned up temporary frame directory: temp_videos/fcaedee9-352c-4494-bd92-a92bb3d1f792 +2025-08-19 00:08:22 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_045.mp4' +2025-08-19 00:08:22 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] Video saved to temporary file: temp_videos/40c5e9be-9369-439c-8294-0daf2ab110bd.mp4 +2025-08-19 00:08:22 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:08:27 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:08:27 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] 30 frames saved to temp_videos/40c5e9be-9369-439c-8294-0daf2ab110bd +2025-08-19 00:08:27 - INFO - Prompt token length: 3584 +2025-08-19 00:08:45 - INFO - Tokens per second: 43.60305109768778, Peak GPU memory MB: 9376.375 +2025-08-19 00:08:45 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] Inference time: 22.69 seconds, CPU usage: 43.3%, CPU core utilization: [52.4, 45.1, 48.5, 27.3] +2025-08-19 00:08:45 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] Cleaned up temporary file: temp_videos/40c5e9be-9369-439c-8294-0daf2ab110bd.mp4 +2025-08-19 00:08:45 - INFO - [40c5e9be-9369-439c-8294-0daf2ab110bd] Cleaned up temporary frame directory: temp_videos/40c5e9be-9369-439c-8294-0daf2ab110bd +2025-08-19 00:08:45 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_046.mp4' +2025-08-19 00:08:45 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] Video saved to temporary file: temp_videos/e6cf5776-f8bb-4cdd-9a51-c780ccd99a15.mp4 +2025-08-19 00:08:45 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:08:50 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:08:50 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] 30 frames saved to temp_videos/e6cf5776-f8bb-4cdd-9a51-c780ccd99a15 +2025-08-19 00:08:51 - INFO - Prompt token length: 3584 +2025-08-19 00:09:08 - INFO - Tokens per second: 43.843882768658645, Peak GPU memory MB: 9376.375 +2025-08-19 00:09:08 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] Inference time: 22.80 seconds, CPU usage: 45.2%, CPU core utilization: [27.3, 27.9, 94.3, 31.3] +2025-08-19 00:09:08 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] Cleaned up temporary file: temp_videos/e6cf5776-f8bb-4cdd-9a51-c780ccd99a15.mp4 +2025-08-19 00:09:08 - INFO - [e6cf5776-f8bb-4cdd-9a51-c780ccd99a15] Cleaned up temporary frame directory: temp_videos/e6cf5776-f8bb-4cdd-9a51-c780ccd99a15 +2025-08-19 00:09:08 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_047.mp4' +2025-08-19 00:09:08 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] Video saved to temporary file: temp_videos/3dd3ee21-fec9-419f-a35f-04002380fa23.mp4 +2025-08-19 00:09:08 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:09:12 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:09:12 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] 30 frames saved to temp_videos/3dd3ee21-fec9-419f-a35f-04002380fa23 +2025-08-19 00:09:13 - INFO - Prompt token length: 3584 +2025-08-19 00:09:30 - INFO - Tokens per second: 43.798575227467595, Peak GPU memory MB: 9376.375 +2025-08-19 00:09:30 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] Inference time: 22.24 seconds, CPU usage: 44.4%, CPU core utilization: [44.4, 29.2, 75.3, 28.8] +2025-08-19 00:09:30 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] Cleaned up temporary file: temp_videos/3dd3ee21-fec9-419f-a35f-04002380fa23.mp4 +2025-08-19 00:09:30 - INFO - [3dd3ee21-fec9-419f-a35f-04002380fa23] Cleaned up temporary frame directory: temp_videos/3dd3ee21-fec9-419f-a35f-04002380fa23 +2025-08-19 00:09:30 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_048.mp4' +2025-08-19 00:09:30 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] Video saved to temporary file: temp_videos/c554e97f-ecbe-4a5c-bb36-a46df0df0311.mp4 +2025-08-19 00:09:30 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:09:35 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:09:35 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] 30 frames saved to temp_videos/c554e97f-ecbe-4a5c-bb36-a46df0df0311 +2025-08-19 00:09:35 - INFO - Prompt token length: 3584 +2025-08-19 00:09:52 - INFO - Tokens per second: 43.89402793517226, Peak GPU memory MB: 9376.375 +2025-08-19 00:09:52 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] Inference time: 22.37 seconds, CPU usage: 44.3%, CPU core utilization: [86.0, 31.9, 30.7, 28.4] +2025-08-19 00:09:52 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] Cleaned up temporary file: temp_videos/c554e97f-ecbe-4a5c-bb36-a46df0df0311.mp4 +2025-08-19 00:09:52 - INFO - [c554e97f-ecbe-4a5c-bb36-a46df0df0311] Cleaned up temporary frame directory: temp_videos/c554e97f-ecbe-4a5c-bb36-a46df0df0311 +2025-08-19 00:09:52 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_049.mp4' +2025-08-19 00:09:52 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] Video saved to temporary file: temp_videos/4ff18281-ea35-436b-a4e2-1c26cdb821ef.mp4 +2025-08-19 00:09:52 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:09:57 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:09:57 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] 30 frames saved to temp_videos/4ff18281-ea35-436b-a4e2-1c26cdb821ef +2025-08-19 00:09:57 - INFO - Prompt token length: 3584 +2025-08-19 00:10:15 - INFO - Tokens per second: 43.56379224518244, Peak GPU memory MB: 9376.375 +2025-08-19 00:10:15 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] Inference time: 22.53 seconds, CPU usage: 43.2%, CPU core utilization: [31.4, 85.1, 26.9, 29.2] +2025-08-19 00:10:15 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] Cleaned up temporary file: temp_videos/4ff18281-ea35-436b-a4e2-1c26cdb821ef.mp4 +2025-08-19 00:10:15 - INFO - [4ff18281-ea35-436b-a4e2-1c26cdb821ef] Cleaned up temporary frame directory: temp_videos/4ff18281-ea35-436b-a4e2-1c26cdb821ef +2025-08-19 00:10:15 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_050.mp4' +2025-08-19 00:10:15 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] Video saved to temporary file: temp_videos/dd0795f6-11a5-4a4d-ae78-46fbf1edf466.mp4 +2025-08-19 00:10:15 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:10:20 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:10:20 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] 30 frames saved to temp_videos/dd0795f6-11a5-4a4d-ae78-46fbf1edf466 +2025-08-19 00:10:20 - INFO - Prompt token length: 3584 +2025-08-19 00:10:37 - INFO - Tokens per second: 43.61928425153152, Peak GPU memory MB: 9376.375 +2025-08-19 00:10:37 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] Inference time: 22.56 seconds, CPU usage: 44.0%, CPU core utilization: [24.9, 65.1, 31.4, 54.7] +2025-08-19 00:10:37 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] Cleaned up temporary file: temp_videos/dd0795f6-11a5-4a4d-ae78-46fbf1edf466.mp4 +2025-08-19 00:10:37 - INFO - [dd0795f6-11a5-4a4d-ae78-46fbf1edf466] Cleaned up temporary frame directory: temp_videos/dd0795f6-11a5-4a4d-ae78-46fbf1edf466 +2025-08-19 00:10:37 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_051.mp4' +2025-08-19 00:10:37 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] Video saved to temporary file: temp_videos/d61c8693-6567-4f02-b3bf-843af3d00ee0.mp4 +2025-08-19 00:10:37 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:10:43 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:10:43 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] 30 frames saved to temp_videos/d61c8693-6567-4f02-b3bf-843af3d00ee0 +2025-08-19 00:10:43 - INFO - Prompt token length: 3584 +2025-08-19 00:11:01 - INFO - Tokens per second: 42.97388508982766, Peak GPU memory MB: 9376.375 +2025-08-19 00:11:01 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] Inference time: 23.27 seconds, CPU usage: 44.6%, CPU core utilization: [52.0, 43.1, 56.1, 27.2] +2025-08-19 00:11:01 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] Cleaned up temporary file: temp_videos/d61c8693-6567-4f02-b3bf-843af3d00ee0.mp4 +2025-08-19 00:11:01 - INFO - [d61c8693-6567-4f02-b3bf-843af3d00ee0] Cleaned up temporary frame directory: temp_videos/d61c8693-6567-4f02-b3bf-843af3d00ee0 +2025-08-19 00:11:01 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_052.mp4' +2025-08-19 00:11:01 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] Video saved to temporary file: temp_videos/7329add3-cb8c-46a7-aba5-65da3a0009f7.mp4 +2025-08-19 00:11:01 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:11:05 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:11:05 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] 30 frames saved to temp_videos/7329add3-cb8c-46a7-aba5-65da3a0009f7 +2025-08-19 00:11:06 - INFO - Prompt token length: 3584 +2025-08-19 00:11:23 - INFO - Tokens per second: 43.800756217355975, Peak GPU memory MB: 9376.375 +2025-08-19 00:11:23 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] Inference time: 22.56 seconds, CPU usage: 43.4%, CPU core utilization: [38.0, 28.5, 79.2, 27.9] +2025-08-19 00:11:23 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] Cleaned up temporary file: temp_videos/7329add3-cb8c-46a7-aba5-65da3a0009f7.mp4 +2025-08-19 00:11:23 - INFO - [7329add3-cb8c-46a7-aba5-65da3a0009f7] Cleaned up temporary frame directory: temp_videos/7329add3-cb8c-46a7-aba5-65da3a0009f7 +2025-08-19 00:11:23 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_053.mp4' +2025-08-19 00:11:23 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] Video saved to temporary file: temp_videos/f642e0cd-9db3-44c7-b4f8-21be55ecab35.mp4 +2025-08-19 00:11:23 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:11:28 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:11:28 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] 30 frames saved to temp_videos/f642e0cd-9db3-44c7-b4f8-21be55ecab35 +2025-08-19 00:11:29 - INFO - Prompt token length: 3584 +2025-08-19 00:11:45 - INFO - Tokens per second: 43.63552451411287, Peak GPU memory MB: 9376.375 +2025-08-19 00:11:45 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] Inference time: 21.78 seconds, CPU usage: 44.8%, CPU core utilization: [64.1, 30.4, 56.3, 28.6] +2025-08-19 00:11:45 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] Cleaned up temporary file: temp_videos/f642e0cd-9db3-44c7-b4f8-21be55ecab35.mp4 +2025-08-19 00:11:45 - INFO - [f642e0cd-9db3-44c7-b4f8-21be55ecab35] Cleaned up temporary frame directory: temp_videos/f642e0cd-9db3-44c7-b4f8-21be55ecab35 +2025-08-19 00:11:45 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_054.mp4' +2025-08-19 00:11:45 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] Video saved to temporary file: temp_videos/8646fa98-335e-4c9d-827f-7059c54efc26.mp4 +2025-08-19 00:11:45 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:11:50 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:11:50 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] 30 frames saved to temp_videos/8646fa98-335e-4c9d-827f-7059c54efc26 +2025-08-19 00:11:50 - INFO - Prompt token length: 3584 +2025-08-19 00:12:07 - INFO - Tokens per second: 43.636680260959196, Peak GPU memory MB: 9376.375 +2025-08-19 00:12:07 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] Inference time: 22.28 seconds, CPU usage: 44.5%, CPU core utilization: [52.8, 60.9, 26.8, 37.5] +2025-08-19 00:12:07 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] Cleaned up temporary file: temp_videos/8646fa98-335e-4c9d-827f-7059c54efc26.mp4 +2025-08-19 00:12:07 - INFO - [8646fa98-335e-4c9d-827f-7059c54efc26] Cleaned up temporary frame directory: temp_videos/8646fa98-335e-4c9d-827f-7059c54efc26 +2025-08-19 00:12:07 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_055.mp4' +2025-08-19 00:12:08 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] Video saved to temporary file: temp_videos/987ebc1c-98be-497a-949d-00cf6a8df72f.mp4 +2025-08-19 00:12:08 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:12:13 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:12:13 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] 30 frames saved to temp_videos/987ebc1c-98be-497a-949d-00cf6a8df72f +2025-08-19 00:12:14 - INFO - Prompt token length: 3584 +2025-08-19 00:12:31 - INFO - Tokens per second: 43.44007709060384, Peak GPU memory MB: 9376.375 +2025-08-19 00:12:31 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] Inference time: 23.51 seconds, CPU usage: 45.3%, CPU core utilization: [30.5, 45.8, 29.1, 75.8] +2025-08-19 00:12:31 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] Cleaned up temporary file: temp_videos/987ebc1c-98be-497a-949d-00cf6a8df72f.mp4 +2025-08-19 00:12:31 - INFO - [987ebc1c-98be-497a-949d-00cf6a8df72f] Cleaned up temporary frame directory: temp_videos/987ebc1c-98be-497a-949d-00cf6a8df72f +2025-08-19 00:12:31 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_056.mp4' +2025-08-19 00:12:31 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] Video saved to temporary file: temp_videos/6ba94afb-5ade-4951-9439-61cb619a3eda.mp4 +2025-08-19 00:12:31 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:12:36 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:12:36 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] 30 frames saved to temp_videos/6ba94afb-5ade-4951-9439-61cb619a3eda +2025-08-19 00:12:36 - INFO - Prompt token length: 3584 +2025-08-19 00:12:54 - INFO - Tokens per second: 43.73466548585687, Peak GPU memory MB: 9376.375 +2025-08-19 00:12:54 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] Inference time: 22.74 seconds, CPU usage: 44.0%, CPU core utilization: [47.8, 31.9, 71.2, 24.9] +2025-08-19 00:12:54 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] Cleaned up temporary file: temp_videos/6ba94afb-5ade-4951-9439-61cb619a3eda.mp4 +2025-08-19 00:12:54 - INFO - [6ba94afb-5ade-4951-9439-61cb619a3eda] Cleaned up temporary frame directory: temp_videos/6ba94afb-5ade-4951-9439-61cb619a3eda +2025-08-19 00:12:54 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_057.mp4' +2025-08-19 00:12:54 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] Video saved to temporary file: temp_videos/98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f.mp4 +2025-08-19 00:12:54 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:12:59 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:12:59 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] 30 frames saved to temp_videos/98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f +2025-08-19 00:12:59 - INFO - Prompt token length: 3584 +2025-08-19 00:13:17 - INFO - Tokens per second: 43.44122225548033, Peak GPU memory MB: 9376.375 +2025-08-19 00:13:17 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] Inference time: 23.45 seconds, CPU usage: 44.5%, CPU core utilization: [81.3, 36.1, 31.2, 29.1] +2025-08-19 00:13:17 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] Cleaned up temporary file: temp_videos/98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f.mp4 +2025-08-19 00:13:17 - INFO - [98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f] Cleaned up temporary frame directory: temp_videos/98d349e5-5b6a-4d8b-b0d1-3838b7c76e7f +2025-08-19 00:13:17 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_058.mp4' +2025-08-19 00:13:17 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] Video saved to temporary file: temp_videos/08f3a71a-6079-4d50-9ac3-b854424c48ea.mp4 +2025-08-19 00:13:17 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:13:23 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:13:23 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] 30 frames saved to temp_videos/08f3a71a-6079-4d50-9ac3-b854424c48ea +2025-08-19 00:13:23 - INFO - Prompt token length: 3584 +2025-08-19 00:13:41 - INFO - Tokens per second: 43.24753914054676, Peak GPU memory MB: 9376.375 +2025-08-19 00:13:41 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] Inference time: 23.70 seconds, CPU usage: 44.8%, CPU core utilization: [29.3, 80.1, 29.0, 40.8] +2025-08-19 00:13:41 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] Cleaned up temporary file: temp_videos/08f3a71a-6079-4d50-9ac3-b854424c48ea.mp4 +2025-08-19 00:13:41 - INFO - [08f3a71a-6079-4d50-9ac3-b854424c48ea] Cleaned up temporary frame directory: temp_videos/08f3a71a-6079-4d50-9ac3-b854424c48ea +2025-08-19 00:13:41 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_059.mp4' +2025-08-19 00:13:41 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] Video saved to temporary file: temp_videos/366cd035-ab49-4502-86de-6ac65a94003a.mp4 +2025-08-19 00:13:41 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:13:47 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:13:47 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] 30 frames saved to temp_videos/366cd035-ab49-4502-86de-6ac65a94003a +2025-08-19 00:13:47 - INFO - Prompt token length: 3584 +2025-08-19 00:14:05 - INFO - Tokens per second: 43.41159506336397, Peak GPU memory MB: 9376.375 +2025-08-19 00:14:05 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] Inference time: 23.60 seconds, CPU usage: 45.4%, CPU core utilization: [41.3, 45.2, 28.8, 66.3] +2025-08-19 00:14:05 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] Cleaned up temporary file: temp_videos/366cd035-ab49-4502-86de-6ac65a94003a.mp4 +2025-08-19 00:14:05 - INFO - [366cd035-ab49-4502-86de-6ac65a94003a] Cleaned up temporary frame directory: temp_videos/366cd035-ab49-4502-86de-6ac65a94003a +2025-08-19 00:14:05 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_060.mp4' +2025-08-19 00:14:05 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] Video saved to temporary file: temp_videos/4de43ee3-0051-4cca-8bd8-a6890fa2829f.mp4 +2025-08-19 00:14:05 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:14:10 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:14:10 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] 30 frames saved to temp_videos/4de43ee3-0051-4cca-8bd8-a6890fa2829f +2025-08-19 00:14:10 - INFO - Prompt token length: 3584 +2025-08-19 00:14:27 - INFO - Tokens per second: 42.664462930711395, Peak GPU memory MB: 9376.375 +2025-08-19 00:14:27 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] Inference time: 22.69 seconds, CPU usage: 45.6%, CPU core utilization: [51.6, 29.2, 67.2, 34.5] +2025-08-19 00:14:27 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] Cleaned up temporary file: temp_videos/4de43ee3-0051-4cca-8bd8-a6890fa2829f.mp4 +2025-08-19 00:14:27 - INFO - [4de43ee3-0051-4cca-8bd8-a6890fa2829f] Cleaned up temporary frame directory: temp_videos/4de43ee3-0051-4cca-8bd8-a6890fa2829f +2025-08-19 00:14:27 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_061.mp4' +2025-08-19 00:14:27 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] Video saved to temporary file: temp_videos/09052175-3b1e-46bb-822a-798635e6e32b.mp4 +2025-08-19 00:14:27 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:14:34 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:14:34 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] 30 frames saved to temp_videos/09052175-3b1e-46bb-822a-798635e6e32b +2025-08-19 00:14:34 - INFO - Prompt token length: 3584 +2025-08-19 00:14:51 - INFO - Tokens per second: 43.51970989323823, Peak GPU memory MB: 9376.375 +2025-08-19 00:14:51 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] Inference time: 23.80 seconds, CPU usage: 47.8%, CPU core utilization: [49.2, 36.0, 68.3, 37.6] +2025-08-19 00:14:51 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] Cleaned up temporary file: temp_videos/09052175-3b1e-46bb-822a-798635e6e32b.mp4 +2025-08-19 00:14:51 - INFO - [09052175-3b1e-46bb-822a-798635e6e32b] Cleaned up temporary frame directory: temp_videos/09052175-3b1e-46bb-822a-798635e6e32b +2025-08-19 00:14:51 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_062.mp4' +2025-08-19 00:14:51 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] Video saved to temporary file: temp_videos/765a31c4-6b2f-4115-98a0-99024ed175a6.mp4 +2025-08-19 00:14:51 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:14:57 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:14:57 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] 30 frames saved to temp_videos/765a31c4-6b2f-4115-98a0-99024ed175a6 +2025-08-19 00:14:57 - INFO - Prompt token length: 3584 +2025-08-19 00:15:16 - INFO - Tokens per second: 43.540681114859616, Peak GPU memory MB: 9376.375 +2025-08-19 00:15:16 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] Inference time: 24.19 seconds, CPU usage: 43.9%, CPU core utilization: [47.8, 48.5, 45.2, 33.9] +2025-08-19 00:15:16 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] Cleaned up temporary file: temp_videos/765a31c4-6b2f-4115-98a0-99024ed175a6.mp4 +2025-08-19 00:15:16 - INFO - [765a31c4-6b2f-4115-98a0-99024ed175a6] Cleaned up temporary frame directory: temp_videos/765a31c4-6b2f-4115-98a0-99024ed175a6 +2025-08-19 00:15:16 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_063.mp4' +2025-08-19 00:15:16 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] Video saved to temporary file: temp_videos/abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436.mp4 +2025-08-19 00:15:16 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:15:21 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:15:21 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] 30 frames saved to temp_videos/abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436 +2025-08-19 00:15:21 - INFO - Prompt token length: 3584 +2025-08-19 00:15:39 - INFO - Tokens per second: 37.796216146669394, Peak GPU memory MB: 9376.375 +2025-08-19 00:15:39 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] Inference time: 23.91 seconds, CPU usage: 53.0%, CPU core utilization: [73.7, 42.0, 52.8, 43.7] +2025-08-19 00:15:39 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] Cleaned up temporary file: temp_videos/abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436.mp4 +2025-08-19 00:15:39 - INFO - [abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436] Cleaned up temporary frame directory: temp_videos/abaa3eb3-cb19-4bf1-ad8a-0d2641cf0436 +2025-08-19 00:15:40 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_064.mp4' +2025-08-19 00:15:40 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] Video saved to temporary file: temp_videos/ea938f48-111b-40bd-ba5b-a1503f7e9c2f.mp4 +2025-08-19 00:15:40 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:15:48 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:15:48 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] 30 frames saved to temp_videos/ea938f48-111b-40bd-ba5b-a1503f7e9c2f +2025-08-19 00:15:48 - INFO - Prompt token length: 3584 +2025-08-19 00:16:05 - INFO - Tokens per second: 43.727275245614855, Peak GPU memory MB: 9376.375 +2025-08-19 00:16:05 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] Inference time: 25.78 seconds, CPU usage: 52.1%, CPU core utilization: [57.7, 38.9, 74.6, 37.3] +2025-08-19 00:16:05 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] Cleaned up temporary file: temp_videos/ea938f48-111b-40bd-ba5b-a1503f7e9c2f.mp4 +2025-08-19 00:16:05 - INFO - [ea938f48-111b-40bd-ba5b-a1503f7e9c2f] Cleaned up temporary frame directory: temp_videos/ea938f48-111b-40bd-ba5b-a1503f7e9c2f +2025-08-19 00:16:05 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_065.mp4' +2025-08-19 00:16:05 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] Video saved to temporary file: temp_videos/1bf8bb45-72f7-49ff-ae23-bfd3975b2825.mp4 +2025-08-19 00:16:05 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:16:11 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:16:11 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] 30 frames saved to temp_videos/1bf8bb45-72f7-49ff-ae23-bfd3975b2825 +2025-08-19 00:16:12 - INFO - Prompt token length: 3584 +2025-08-19 00:16:30 - INFO - Tokens per second: 38.956429242334835, Peak GPU memory MB: 9376.375 +2025-08-19 00:16:30 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] Inference time: 24.38 seconds, CPU usage: 69.1%, CPU core utilization: [65.4, 61.1, 64.1, 85.6] +2025-08-19 00:16:30 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] Cleaned up temporary file: temp_videos/1bf8bb45-72f7-49ff-ae23-bfd3975b2825.mp4 +2025-08-19 00:16:30 - INFO - [1bf8bb45-72f7-49ff-ae23-bfd3975b2825] Cleaned up temporary frame directory: temp_videos/1bf8bb45-72f7-49ff-ae23-bfd3975b2825 +2025-08-19 00:16:30 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_066.mp4' +2025-08-19 00:16:30 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] Video saved to temporary file: temp_videos/a3157119-69c6-4037-bfba-d82e7ac83180.mp4 +2025-08-19 00:16:30 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:16:38 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:16:38 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] 30 frames saved to temp_videos/a3157119-69c6-4037-bfba-d82e7ac83180 +2025-08-19 00:16:38 - INFO - Prompt token length: 3584 +2025-08-19 00:16:56 - INFO - Tokens per second: 43.58446168324715, Peak GPU memory MB: 9376.375 +2025-08-19 00:16:56 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] Inference time: 25.85 seconds, CPU usage: 71.0%, CPU core utilization: [87.1, 64.6, 66.0, 66.6] +2025-08-19 00:16:56 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] Cleaned up temporary file: temp_videos/a3157119-69c6-4037-bfba-d82e7ac83180.mp4 +2025-08-19 00:16:56 - INFO - [a3157119-69c6-4037-bfba-d82e7ac83180] Cleaned up temporary frame directory: temp_videos/a3157119-69c6-4037-bfba-d82e7ac83180 +2025-08-19 00:16:56 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_067.mp4' +2025-08-19 00:16:56 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] Video saved to temporary file: temp_videos/d46c2045-b622-411e-9577-bddbd3dbe913.mp4 +2025-08-19 00:16:56 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:17:00 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:17:00 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] 30 frames saved to temp_videos/d46c2045-b622-411e-9577-bddbd3dbe913 +2025-08-19 00:17:01 - INFO - Prompt token length: 3584 +2025-08-19 00:17:17 - INFO - Tokens per second: 43.65934498479588, Peak GPU memory MB: 9376.375 +2025-08-19 00:17:17 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] Inference time: 21.31 seconds, CPU usage: 56.9%, CPU core utilization: [47.3, 76.3, 47.3, 56.6] +2025-08-19 00:17:17 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] Cleaned up temporary file: temp_videos/d46c2045-b622-411e-9577-bddbd3dbe913.mp4 +2025-08-19 00:17:17 - INFO - [d46c2045-b622-411e-9577-bddbd3dbe913] Cleaned up temporary frame directory: temp_videos/d46c2045-b622-411e-9577-bddbd3dbe913 +2025-08-19 00:17:17 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_068.mp4' +2025-08-19 00:17:17 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] Video saved to temporary file: temp_videos/c2b4b202-b382-4e12-aca6-32a385fd43ba.mp4 +2025-08-19 00:17:17 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:17:22 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:17:22 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] 30 frames saved to temp_videos/c2b4b202-b382-4e12-aca6-32a385fd43ba +2025-08-19 00:17:23 - INFO - Prompt token length: 3584 +2025-08-19 00:17:39 - INFO - Tokens per second: 43.06015335799167, Peak GPU memory MB: 9376.375 +2025-08-19 00:17:39 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] Inference time: 21.76 seconds, CPU usage: 47.5%, CPU core utilization: [37.8, 72.4, 30.5, 49.1] +2025-08-19 00:17:39 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] Cleaned up temporary file: temp_videos/c2b4b202-b382-4e12-aca6-32a385fd43ba.mp4 +2025-08-19 00:17:39 - INFO - [c2b4b202-b382-4e12-aca6-32a385fd43ba] Cleaned up temporary frame directory: temp_videos/c2b4b202-b382-4e12-aca6-32a385fd43ba +2025-08-19 00:17:39 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_069.mp4' +2025-08-19 00:17:39 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] Video saved to temporary file: temp_videos/9f91a63e-4989-4419-a662-9537956ce823.mp4 +2025-08-19 00:17:39 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:17:45 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:17:45 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] 30 frames saved to temp_videos/9f91a63e-4989-4419-a662-9537956ce823 +2025-08-19 00:17:45 - INFO - Prompt token length: 3584 +2025-08-19 00:18:02 - INFO - Tokens per second: 43.220403962218256, Peak GPU memory MB: 9376.375 +2025-08-19 00:18:02 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] Inference time: 23.01 seconds, CPU usage: 47.0%, CPU core utilization: [33.4, 43.6, 31.7, 79.4] +2025-08-19 00:18:02 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] Cleaned up temporary file: temp_videos/9f91a63e-4989-4419-a662-9537956ce823.mp4 +2025-08-19 00:18:02 - INFO - [9f91a63e-4989-4419-a662-9537956ce823] Cleaned up temporary frame directory: temp_videos/9f91a63e-4989-4419-a662-9537956ce823 +2025-08-19 00:18:02 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_070.mp4' +2025-08-19 00:18:02 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] Video saved to temporary file: temp_videos/46448195-307b-4e26-9899-fe9601ffd7e5.mp4 +2025-08-19 00:18:02 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:18:08 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:18:08 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] 30 frames saved to temp_videos/46448195-307b-4e26-9899-fe9601ffd7e5 +2025-08-19 00:18:08 - INFO - Prompt token length: 3584 +2025-08-19 00:18:26 - INFO - Tokens per second: 43.39715561820535, Peak GPU memory MB: 9376.375 +2025-08-19 00:18:26 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] Inference time: 24.00 seconds, CPU usage: 45.2%, CPU core utilization: [36.5, 33.6, 84.4, 26.3] +2025-08-19 00:18:26 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] Cleaned up temporary file: temp_videos/46448195-307b-4e26-9899-fe9601ffd7e5.mp4 +2025-08-19 00:18:26 - INFO - [46448195-307b-4e26-9899-fe9601ffd7e5] Cleaned up temporary frame directory: temp_videos/46448195-307b-4e26-9899-fe9601ffd7e5 +2025-08-19 00:18:26 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_071.mp4' +2025-08-19 00:18:26 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] Video saved to temporary file: temp_videos/8d0f17d0-fb44-4984-91c2-16480b47a73b.mp4 +2025-08-19 00:18:26 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:18:31 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:18:31 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] 30 frames saved to temp_videos/8d0f17d0-fb44-4984-91c2-16480b47a73b +2025-08-19 00:18:31 - INFO - Prompt token length: 3584 +2025-08-19 00:18:48 - INFO - Tokens per second: 43.466487817577274, Peak GPU memory MB: 9376.375 +2025-08-19 00:18:48 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] Inference time: 22.43 seconds, CPU usage: 45.6%, CPU core utilization: [38.0, 48.8, 35.4, 60.0] +2025-08-19 00:18:48 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] Cleaned up temporary file: temp_videos/8d0f17d0-fb44-4984-91c2-16480b47a73b.mp4 +2025-08-19 00:18:48 - INFO - [8d0f17d0-fb44-4984-91c2-16480b47a73b] Cleaned up temporary frame directory: temp_videos/8d0f17d0-fb44-4984-91c2-16480b47a73b +2025-08-19 00:18:48 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_072.mp4' +2025-08-19 00:18:48 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] Video saved to temporary file: temp_videos/a4f9d9a4-cf24-40c5-ba7f-44c054938c14.mp4 +2025-08-19 00:18:48 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:18:53 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:18:53 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] 30 frames saved to temp_videos/a4f9d9a4-cf24-40c5-ba7f-44c054938c14 +2025-08-19 00:18:53 - INFO - Prompt token length: 3584 +2025-08-19 00:19:11 - INFO - Tokens per second: 43.3955833964203, Peak GPU memory MB: 9376.375 +2025-08-19 00:19:11 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] Inference time: 22.81 seconds, CPU usage: 43.0%, CPU core utilization: [47.9, 48.6, 53.5, 22.2] +2025-08-19 00:19:11 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] Cleaned up temporary file: temp_videos/a4f9d9a4-cf24-40c5-ba7f-44c054938c14.mp4 +2025-08-19 00:19:11 - INFO - [a4f9d9a4-cf24-40c5-ba7f-44c054938c14] Cleaned up temporary frame directory: temp_videos/a4f9d9a4-cf24-40c5-ba7f-44c054938c14 +2025-08-19 00:19:11 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_073.mp4' +2025-08-19 00:19:11 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] Video saved to temporary file: temp_videos/c3f5d00e-14bb-4168-b922-b234e61f7c74.mp4 +2025-08-19 00:19:11 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:19:16 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:19:16 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] 30 frames saved to temp_videos/c3f5d00e-14bb-4168-b922-b234e61f7c74 +2025-08-19 00:19:16 - INFO - Prompt token length: 3584 +2025-08-19 00:19:34 - INFO - Tokens per second: 43.392131145686385, Peak GPU memory MB: 9376.375 +2025-08-19 00:19:34 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] Inference time: 23.08 seconds, CPU usage: 43.5%, CPU core utilization: [26.6, 21.1, 29.5, 96.7] +2025-08-19 00:19:34 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] Cleaned up temporary file: temp_videos/c3f5d00e-14bb-4168-b922-b234e61f7c74.mp4 +2025-08-19 00:19:34 - INFO - [c3f5d00e-14bb-4168-b922-b234e61f7c74] Cleaned up temporary frame directory: temp_videos/c3f5d00e-14bb-4168-b922-b234e61f7c74 +2025-08-19 00:19:34 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_074.mp4' +2025-08-19 00:19:34 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] Video saved to temporary file: temp_videos/bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5.mp4 +2025-08-19 00:19:34 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:19:39 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:19:39 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] 30 frames saved to temp_videos/bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5 +2025-08-19 00:19:40 - INFO - Prompt token length: 3584 +2025-08-19 00:19:58 - INFO - Tokens per second: 43.3240962507516, Peak GPU memory MB: 9376.375 +2025-08-19 00:19:58 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] Inference time: 23.30 seconds, CPU usage: 43.9%, CPU core utilization: [22.3, 28.2, 98.5, 26.5] +2025-08-19 00:19:58 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] Cleaned up temporary file: temp_videos/bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5.mp4 +2025-08-19 00:19:58 - INFO - [bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5] Cleaned up temporary frame directory: temp_videos/bc3d9f6b-4b51-4f4f-9a4f-b4ebeba187c5 +2025-08-19 00:19:58 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_075.mp4' +2025-08-19 00:19:58 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] Video saved to temporary file: temp_videos/cd056d45-e87f-4ce2-b4d5-860507269888.mp4 +2025-08-19 00:19:58 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:20:02 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:20:02 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] 30 frames saved to temp_videos/cd056d45-e87f-4ce2-b4d5-860507269888 +2025-08-19 00:20:03 - INFO - Prompt token length: 3584 +2025-08-19 00:20:20 - INFO - Tokens per second: 42.86824700295901, Peak GPU memory MB: 9376.375 +2025-08-19 00:20:20 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] Inference time: 22.13 seconds, CPU usage: 55.2%, CPU core utilization: [43.2, 53.7, 44.1, 79.5] +2025-08-19 00:20:20 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] Cleaned up temporary file: temp_videos/cd056d45-e87f-4ce2-b4d5-860507269888.mp4 +2025-08-19 00:20:20 - INFO - [cd056d45-e87f-4ce2-b4d5-860507269888] Cleaned up temporary frame directory: temp_videos/cd056d45-e87f-4ce2-b4d5-860507269888 +2025-08-19 00:20:20 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_076.mp4' +2025-08-19 00:20:20 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] Video saved to temporary file: temp_videos/8710e2cb-db96-4247-bacf-f6e08cd525c4.mp4 +2025-08-19 00:20:20 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:20:25 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:20:25 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] 30 frames saved to temp_videos/8710e2cb-db96-4247-bacf-f6e08cd525c4 +2025-08-19 00:20:25 - INFO - Prompt token length: 3584 +2025-08-19 00:20:43 - INFO - Tokens per second: 43.49618548005215, Peak GPU memory MB: 9376.375 +2025-08-19 00:20:43 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] Inference time: 23.40 seconds, CPU usage: 42.5%, CPU core utilization: [21.7, 50.0, 55.7, 42.7] +2025-08-19 00:20:43 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] Cleaned up temporary file: temp_videos/8710e2cb-db96-4247-bacf-f6e08cd525c4.mp4 +2025-08-19 00:20:43 - INFO - [8710e2cb-db96-4247-bacf-f6e08cd525c4] Cleaned up temporary frame directory: temp_videos/8710e2cb-db96-4247-bacf-f6e08cd525c4 +2025-08-19 00:20:43 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_077.mp4' +2025-08-19 00:20:43 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] Video saved to temporary file: temp_videos/8fb8026c-6a91-4b73-9823-42b217a109d8.mp4 +2025-08-19 00:20:43 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:20:49 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:20:49 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] 30 frames saved to temp_videos/8fb8026c-6a91-4b73-9823-42b217a109d8 +2025-08-19 00:20:49 - INFO - Prompt token length: 3584 +2025-08-19 00:21:07 - INFO - Tokens per second: 43.25714331377744, Peak GPU memory MB: 9376.375 +2025-08-19 00:21:07 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] Inference time: 23.83 seconds, CPU usage: 44.5%, CPU core utilization: [62.2, 35.9, 52.0, 28.0] +2025-08-19 00:21:07 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] Cleaned up temporary file: temp_videos/8fb8026c-6a91-4b73-9823-42b217a109d8.mp4 +2025-08-19 00:21:07 - INFO - [8fb8026c-6a91-4b73-9823-42b217a109d8] Cleaned up temporary frame directory: temp_videos/8fb8026c-6a91-4b73-9823-42b217a109d8 +2025-08-19 00:21:07 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_078.mp4' +2025-08-19 00:21:07 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] Video saved to temporary file: temp_videos/7987e226-46f3-42b2-8cf6-2808d03557f0.mp4 +2025-08-19 00:21:07 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:21:13 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:21:13 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] 30 frames saved to temp_videos/7987e226-46f3-42b2-8cf6-2808d03557f0 +2025-08-19 00:21:13 - INFO - Prompt token length: 3584 +2025-08-19 00:21:30 - INFO - Tokens per second: 43.85060391397791, Peak GPU memory MB: 9376.375 +2025-08-19 00:21:30 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] Inference time: 23.16 seconds, CPU usage: 50.2%, CPU core utilization: [55.2, 37.0, 71.3, 37.2] +2025-08-19 00:21:30 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] Cleaned up temporary file: temp_videos/7987e226-46f3-42b2-8cf6-2808d03557f0.mp4 +2025-08-19 00:21:30 - INFO - [7987e226-46f3-42b2-8cf6-2808d03557f0] Cleaned up temporary frame directory: temp_videos/7987e226-46f3-42b2-8cf6-2808d03557f0 +2025-08-19 00:21:30 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_079.mp4' +2025-08-19 00:21:30 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] Video saved to temporary file: temp_videos/ec6d07c4-6500-441b-9f4b-f8c8984ec77c.mp4 +2025-08-19 00:21:30 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:21:36 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:21:36 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] 30 frames saved to temp_videos/ec6d07c4-6500-441b-9f4b-f8c8984ec77c +2025-08-19 00:21:37 - INFO - Prompt token length: 3584 +2025-08-19 00:21:54 - INFO - Tokens per second: 43.467235033644464, Peak GPU memory MB: 9376.375 +2025-08-19 00:21:54 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] Inference time: 23.67 seconds, CPU usage: 47.1%, CPU core utilization: [33.4, 42.8, 32.2, 80.0] +2025-08-19 00:21:54 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] Cleaned up temporary file: temp_videos/ec6d07c4-6500-441b-9f4b-f8c8984ec77c.mp4 +2025-08-19 00:21:54 - INFO - [ec6d07c4-6500-441b-9f4b-f8c8984ec77c] Cleaned up temporary frame directory: temp_videos/ec6d07c4-6500-441b-9f4b-f8c8984ec77c +2025-08-19 00:21:54 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_080.mp4' +2025-08-19 00:21:54 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] Video saved to temporary file: temp_videos/829837ca-1d2c-47b2-9b72-96ddd902a942.mp4 +2025-08-19 00:21:54 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:22:00 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:22:00 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] 30 frames saved to temp_videos/829837ca-1d2c-47b2-9b72-96ddd902a942 +2025-08-19 00:22:00 - INFO - Prompt token length: 3584 +2025-08-19 00:22:18 - INFO - Tokens per second: 42.13491054305142, Peak GPU memory MB: 9376.375 +2025-08-19 00:22:18 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] Inference time: 23.48 seconds, CPU usage: 55.8%, CPU core utilization: [52.7, 49.7, 55.4, 65.4] +2025-08-19 00:22:18 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] Cleaned up temporary file: temp_videos/829837ca-1d2c-47b2-9b72-96ddd902a942.mp4 +2025-08-19 00:22:18 - INFO - [829837ca-1d2c-47b2-9b72-96ddd902a942] Cleaned up temporary frame directory: temp_videos/829837ca-1d2c-47b2-9b72-96ddd902a942 +2025-08-19 00:22:18 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_081.mp4' +2025-08-19 00:22:18 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] Video saved to temporary file: temp_videos/6eeade54-04c2-4611-80cb-e58e2efcdd07.mp4 +2025-08-19 00:22:18 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:22:25 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:22:25 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] 30 frames saved to temp_videos/6eeade54-04c2-4611-80cb-e58e2efcdd07 +2025-08-19 00:22:25 - INFO - Prompt token length: 3584 +2025-08-19 00:22:43 - INFO - Tokens per second: 43.3165118354319, Peak GPU memory MB: 9376.375 +2025-08-19 00:22:43 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] Inference time: 25.15 seconds, CPU usage: 51.4%, CPU core utilization: [43.1, 59.2, 50.8, 52.6] +2025-08-19 00:22:43 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] Cleaned up temporary file: temp_videos/6eeade54-04c2-4611-80cb-e58e2efcdd07.mp4 +2025-08-19 00:22:43 - INFO - [6eeade54-04c2-4611-80cb-e58e2efcdd07] Cleaned up temporary frame directory: temp_videos/6eeade54-04c2-4611-80cb-e58e2efcdd07 +2025-08-19 00:22:43 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_082.mp4' +2025-08-19 00:22:43 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] Video saved to temporary file: temp_videos/9ecb8db2-0c55-4ac4-9982-0240c4e27ebe.mp4 +2025-08-19 00:22:43 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:22:48 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:22:48 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] 30 frames saved to temp_videos/9ecb8db2-0c55-4ac4-9982-0240c4e27ebe +2025-08-19 00:22:48 - INFO - Prompt token length: 3584 +2025-08-19 00:23:07 - INFO - Tokens per second: 41.074128731433575, Peak GPU memory MB: 9376.375 +2025-08-19 00:23:07 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] Inference time: 23.64 seconds, CPU usage: 51.6%, CPU core utilization: [37.1, 50.6, 42.3, 76.5] +2025-08-19 00:23:07 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] Cleaned up temporary file: temp_videos/9ecb8db2-0c55-4ac4-9982-0240c4e27ebe.mp4 +2025-08-19 00:23:07 - INFO - [9ecb8db2-0c55-4ac4-9982-0240c4e27ebe] Cleaned up temporary frame directory: temp_videos/9ecb8db2-0c55-4ac4-9982-0240c4e27ebe +2025-08-19 00:23:07 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_083.mp4' +2025-08-19 00:23:07 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] Video saved to temporary file: temp_videos/4cdad013-9d89-43fb-8160-46382ffc553c.mp4 +2025-08-19 00:23:07 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:23:12 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:23:12 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] 30 frames saved to temp_videos/4cdad013-9d89-43fb-8160-46382ffc553c +2025-08-19 00:23:12 - INFO - Prompt token length: 3584 +2025-08-19 00:23:29 - INFO - Tokens per second: 43.41314970326025, Peak GPU memory MB: 9376.375 +2025-08-19 00:23:29 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] Inference time: 22.28 seconds, CPU usage: 46.5%, CPU core utilization: [34.0, 42.8, 31.5, 77.8] +2025-08-19 00:23:29 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] Cleaned up temporary file: temp_videos/4cdad013-9d89-43fb-8160-46382ffc553c.mp4 +2025-08-19 00:23:29 - INFO - [4cdad013-9d89-43fb-8160-46382ffc553c] Cleaned up temporary frame directory: temp_videos/4cdad013-9d89-43fb-8160-46382ffc553c +2025-08-19 00:23:29 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_084.mp4' +2025-08-19 00:23:29 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] Video saved to temporary file: temp_videos/7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4.mp4 +2025-08-19 00:23:29 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:23:34 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:23:34 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] 30 frames saved to temp_videos/7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4 +2025-08-19 00:23:34 - INFO - Prompt token length: 3584 +2025-08-19 00:23:51 - INFO - Tokens per second: 43.62598508461413, Peak GPU memory MB: 9376.375 +2025-08-19 00:23:51 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] Inference time: 22.26 seconds, CPU usage: 46.9%, CPU core utilization: [81.5, 39.0, 38.0, 29.1] +2025-08-19 00:23:51 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] Cleaned up temporary file: temp_videos/7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4.mp4 +2025-08-19 00:23:51 - INFO - [7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4] Cleaned up temporary frame directory: temp_videos/7c6b4a6c-2ec8-4e91-aa5a-2f3335776ca4 +2025-08-19 00:23:51 - INFO - [07466a28-800d-4972-acf4-94559053861e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_085.mp4' +2025-08-19 00:23:51 - INFO - [07466a28-800d-4972-acf4-94559053861e] Video saved to temporary file: temp_videos/07466a28-800d-4972-acf4-94559053861e.mp4 +2025-08-19 00:23:51 - INFO - [07466a28-800d-4972-acf4-94559053861e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:23:57 - INFO - [07466a28-800d-4972-acf4-94559053861e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:23:57 - INFO - [07466a28-800d-4972-acf4-94559053861e] 30 frames saved to temp_videos/07466a28-800d-4972-acf4-94559053861e +2025-08-19 00:23:57 - INFO - Prompt token length: 3584 +2025-08-19 00:24:14 - INFO - Tokens per second: 42.46289527866258, Peak GPU memory MB: 9376.375 +2025-08-19 00:24:14 - INFO - [07466a28-800d-4972-acf4-94559053861e] Inference time: 22.80 seconds, CPU usage: 51.1%, CPU core utilization: [42.8, 65.0, 52.8, 43.9] +2025-08-19 00:24:14 - INFO - [07466a28-800d-4972-acf4-94559053861e] Cleaned up temporary file: temp_videos/07466a28-800d-4972-acf4-94559053861e.mp4 +2025-08-19 00:24:14 - INFO - [07466a28-800d-4972-acf4-94559053861e] Cleaned up temporary frame directory: temp_videos/07466a28-800d-4972-acf4-94559053861e +2025-08-19 00:24:14 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_086.mp4' +2025-08-19 00:24:14 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] Video saved to temporary file: temp_videos/56cb38a2-66c4-43e8-bc7d-31b8cb08aff8.mp4 +2025-08-19 00:24:14 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:24:20 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:24:20 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] 30 frames saved to temp_videos/56cb38a2-66c4-43e8-bc7d-31b8cb08aff8 +2025-08-19 00:24:20 - INFO - Prompt token length: 3584 +2025-08-19 00:24:38 - INFO - Tokens per second: 37.8510224115901, Peak GPU memory MB: 9376.375 +2025-08-19 00:24:38 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] Inference time: 23.45 seconds, CPU usage: 58.0%, CPU core utilization: [55.1, 48.7, 80.1, 48.1] +2025-08-19 00:24:38 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] Cleaned up temporary file: temp_videos/56cb38a2-66c4-43e8-bc7d-31b8cb08aff8.mp4 +2025-08-19 00:24:38 - INFO - [56cb38a2-66c4-43e8-bc7d-31b8cb08aff8] Cleaned up temporary frame directory: temp_videos/56cb38a2-66c4-43e8-bc7d-31b8cb08aff8 +2025-08-19 00:24:38 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_087.mp4' +2025-08-19 00:24:38 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] Video saved to temporary file: temp_videos/29fb6369-8944-4456-b037-5a218569fc63.mp4 +2025-08-19 00:24:38 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:24:43 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:24:43 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] 30 frames saved to temp_videos/29fb6369-8944-4456-b037-5a218569fc63 +2025-08-19 00:24:44 - INFO - Prompt token length: 3584 +2025-08-19 00:25:01 - INFO - Tokens per second: 43.779372271828834, Peak GPU memory MB: 9376.375 +2025-08-19 00:25:01 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] Inference time: 23.15 seconds, CPU usage: 46.2%, CPU core utilization: [80.2, 32.2, 43.0, 29.4] +2025-08-19 00:25:01 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] Cleaned up temporary file: temp_videos/29fb6369-8944-4456-b037-5a218569fc63.mp4 +2025-08-19 00:25:01 - INFO - [29fb6369-8944-4456-b037-5a218569fc63] Cleaned up temporary frame directory: temp_videos/29fb6369-8944-4456-b037-5a218569fc63 +2025-08-19 00:25:01 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_088.mp4' +2025-08-19 00:25:01 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] Video saved to temporary file: temp_videos/4112d4a0-25b6-4de3-b8df-07482266ea12.mp4 +2025-08-19 00:25:01 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:25:06 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:25:06 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] 30 frames saved to temp_videos/4112d4a0-25b6-4de3-b8df-07482266ea12 +2025-08-19 00:25:07 - INFO - Prompt token length: 3584 +2025-08-19 00:25:23 - INFO - Tokens per second: 43.907938544856734, Peak GPU memory MB: 9376.375 +2025-08-19 00:25:23 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] Inference time: 22.60 seconds, CPU usage: 45.6%, CPU core utilization: [60.0, 28.6, 61.1, 32.8] +2025-08-19 00:25:23 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] Cleaned up temporary file: temp_videos/4112d4a0-25b6-4de3-b8df-07482266ea12.mp4 +2025-08-19 00:25:23 - INFO - [4112d4a0-25b6-4de3-b8df-07482266ea12] Cleaned up temporary frame directory: temp_videos/4112d4a0-25b6-4de3-b8df-07482266ea12 +2025-08-19 00:25:23 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_089.mp4' +2025-08-19 00:25:23 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] Video saved to temporary file: temp_videos/2aa859c2-524a-4b40-b3f1-11f7d2c09c8d.mp4 +2025-08-19 00:25:23 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:25:28 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:25:28 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] 30 frames saved to temp_videos/2aa859c2-524a-4b40-b3f1-11f7d2c09c8d +2025-08-19 00:25:29 - INFO - Prompt token length: 3584 +2025-08-19 00:25:46 - INFO - Tokens per second: 42.785801642658335, Peak GPU memory MB: 9376.375 +2025-08-19 00:25:46 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] Inference time: 22.84 seconds, CPU usage: 45.7%, CPU core utilization: [34.4, 47.3, 59.1, 42.1] +2025-08-19 00:25:46 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] Cleaned up temporary file: temp_videos/2aa859c2-524a-4b40-b3f1-11f7d2c09c8d.mp4 +2025-08-19 00:25:46 - INFO - [2aa859c2-524a-4b40-b3f1-11f7d2c09c8d] Cleaned up temporary frame directory: temp_videos/2aa859c2-524a-4b40-b3f1-11f7d2c09c8d +2025-08-19 00:25:46 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_090.mp4' +2025-08-19 00:25:46 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] Video saved to temporary file: temp_videos/3f587794-15a1-4bb7-9460-aa4cda103176.mp4 +2025-08-19 00:25:46 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:25:52 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:25:52 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] 30 frames saved to temp_videos/3f587794-15a1-4bb7-9460-aa4cda103176 +2025-08-19 00:25:52 - INFO - Prompt token length: 3584 +2025-08-19 00:26:10 - INFO - Tokens per second: 43.70943500244427, Peak GPU memory MB: 9376.375 +2025-08-19 00:26:10 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] Inference time: 23.45 seconds, CPU usage: 45.4%, CPU core utilization: [29.0, 72.4, 32.0, 48.4] +2025-08-19 00:26:10 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] Cleaned up temporary file: temp_videos/3f587794-15a1-4bb7-9460-aa4cda103176.mp4 +2025-08-19 00:26:10 - INFO - [3f587794-15a1-4bb7-9460-aa4cda103176] Cleaned up temporary frame directory: temp_videos/3f587794-15a1-4bb7-9460-aa4cda103176 +2025-08-19 00:26:10 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_091.mp4' +2025-08-19 00:26:10 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] Video saved to temporary file: temp_videos/5b07e272-fc1d-4f43-9786-5c6813642a9a.mp4 +2025-08-19 00:26:10 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:26:14 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:26:14 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] 30 frames saved to temp_videos/5b07e272-fc1d-4f43-9786-5c6813642a9a +2025-08-19 00:26:15 - INFO - Prompt token length: 3584 +2025-08-19 00:26:32 - INFO - Tokens per second: 43.53166357065771, Peak GPU memory MB: 9376.375 +2025-08-19 00:26:32 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] Inference time: 22.16 seconds, CPU usage: 43.6%, CPU core utilization: [61.1, 56.1, 30.1, 27.1] +2025-08-19 00:26:32 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] Cleaned up temporary file: temp_videos/5b07e272-fc1d-4f43-9786-5c6813642a9a.mp4 +2025-08-19 00:26:32 - INFO - [5b07e272-fc1d-4f43-9786-5c6813642a9a] Cleaned up temporary frame directory: temp_videos/5b07e272-fc1d-4f43-9786-5c6813642a9a +2025-08-19 00:26:32 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_092.mp4' +2025-08-19 00:26:32 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] Video saved to temporary file: temp_videos/d95c67fa-4234-48f7-a319-43376e2ad8c0.mp4 +2025-08-19 00:26:32 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:26:36 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:26:36 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] 30 frames saved to temp_videos/d95c67fa-4234-48f7-a319-43376e2ad8c0 +2025-08-19 00:26:36 - INFO - Prompt token length: 3584 +2025-08-19 00:26:54 - INFO - Tokens per second: 43.549125338189285, Peak GPU memory MB: 9376.375 +2025-08-19 00:26:54 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] Inference time: 22.23 seconds, CPU usage: 40.9%, CPU core utilization: [24.2, 48.0, 24.0, 67.3] +2025-08-19 00:26:54 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] Cleaned up temporary file: temp_videos/d95c67fa-4234-48f7-a319-43376e2ad8c0.mp4 +2025-08-19 00:26:54 - INFO - [d95c67fa-4234-48f7-a319-43376e2ad8c0] Cleaned up temporary frame directory: temp_videos/d95c67fa-4234-48f7-a319-43376e2ad8c0 +2025-08-19 00:26:54 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_093.mp4' +2025-08-19 00:26:54 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] Video saved to temporary file: temp_videos/e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb.mp4 +2025-08-19 00:26:54 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:26:57 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:26:57 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] 30 frames saved to temp_videos/e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb +2025-08-19 00:26:57 - INFO - Prompt token length: 3584 +2025-08-19 00:27:13 - INFO - Tokens per second: 43.81067232421721, Peak GPU memory MB: 9376.375 +2025-08-19 00:27:13 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] Inference time: 19.25 seconds, CPU usage: 37.6%, CPU core utilization: [45.4, 47.1, 34.6, 23.3] +2025-08-19 00:27:13 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] Cleaned up temporary file: temp_videos/e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb.mp4 +2025-08-19 00:27:13 - INFO - [e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb] Cleaned up temporary frame directory: temp_videos/e8c39e39-19fd-4ed4-af31-c9e9eb39e7fb +2025-08-19 00:27:14 - INFO - [a580b037-2332-41c4-8083-1528607b4723] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_094.mp4' +2025-08-19 00:27:14 - INFO - [a580b037-2332-41c4-8083-1528607b4723] Video saved to temporary file: temp_videos/a580b037-2332-41c4-8083-1528607b4723.mp4 +2025-08-19 00:27:14 - INFO - [a580b037-2332-41c4-8083-1528607b4723] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:27:16 - INFO - [a580b037-2332-41c4-8083-1528607b4723] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:27:16 - INFO - [a580b037-2332-41c4-8083-1528607b4723] 30 frames saved to temp_videos/a580b037-2332-41c4-8083-1528607b4723 +2025-08-19 00:27:16 - INFO - Prompt token length: 3584 +2025-08-19 00:27:33 - INFO - Tokens per second: 43.90407130082045, Peak GPU memory MB: 9376.375 +2025-08-19 00:27:33 - INFO - [a580b037-2332-41c4-8083-1528607b4723] Inference time: 19.47 seconds, CPU usage: 36.5%, CPU core utilization: [26.0, 45.5, 18.2, 56.1] +2025-08-19 00:27:33 - INFO - [a580b037-2332-41c4-8083-1528607b4723] Cleaned up temporary file: temp_videos/a580b037-2332-41c4-8083-1528607b4723.mp4 +2025-08-19 00:27:33 - INFO - [a580b037-2332-41c4-8083-1528607b4723] Cleaned up temporary frame directory: temp_videos/a580b037-2332-41c4-8083-1528607b4723 +2025-08-19 00:27:33 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 00:27:33 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] Video saved to temporary file: temp_videos/0a5c1c7e-57f5-457e-9ff4-2b101994b9fa.mp4 +2025-08-19 00:27:33 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:27:37 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:27:37 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] 30 frames saved to temp_videos/0a5c1c7e-57f5-457e-9ff4-2b101994b9fa +2025-08-19 00:27:37 - INFO - Prompt token length: 3584 +2025-08-19 00:27:54 - INFO - Tokens per second: 43.63059232470235, Peak GPU memory MB: 9376.375 +2025-08-19 00:27:54 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] Inference time: 21.40 seconds, CPU usage: 41.1%, CPU core utilization: [21.9, 68.3, 27.7, 46.5] +2025-08-19 00:27:54 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] Cleaned up temporary file: temp_videos/0a5c1c7e-57f5-457e-9ff4-2b101994b9fa.mp4 +2025-08-19 00:27:54 - INFO - [0a5c1c7e-57f5-457e-9ff4-2b101994b9fa] Cleaned up temporary frame directory: temp_videos/0a5c1c7e-57f5-457e-9ff4-2b101994b9fa +2025-08-19 00:27:55 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_002.mp4' +2025-08-19 00:27:55 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] Video saved to temporary file: temp_videos/e9ad7164-75d4-4ff0-a8b5-538233540578.mp4 +2025-08-19 00:27:55 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:28:01 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:28:01 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] 30 frames saved to temp_videos/e9ad7164-75d4-4ff0-a8b5-538233540578 +2025-08-19 00:28:01 - INFO - Prompt token length: 3584 +2025-08-19 00:28:18 - INFO - Tokens per second: 43.51528055992275, Peak GPU memory MB: 9376.375 +2025-08-19 00:28:18 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] Inference time: 23.43 seconds, CPU usage: 46.0%, CPU core utilization: [42.4, 66.6, 33.9, 41.1] +2025-08-19 00:28:18 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] Cleaned up temporary file: temp_videos/e9ad7164-75d4-4ff0-a8b5-538233540578.mp4 +2025-08-19 00:28:18 - INFO - [e9ad7164-75d4-4ff0-a8b5-538233540578] Cleaned up temporary frame directory: temp_videos/e9ad7164-75d4-4ff0-a8b5-538233540578 +2025-08-19 00:28:18 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_003.mp4' +2025-08-19 00:28:18 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] Video saved to temporary file: temp_videos/e030af98-0a2d-4526-aa94-526fc498572a.mp4 +2025-08-19 00:28:18 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:28:23 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:28:23 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] 30 frames saved to temp_videos/e030af98-0a2d-4526-aa94-526fc498572a +2025-08-19 00:28:24 - INFO - Prompt token length: 3584 +2025-08-19 00:28:41 - INFO - Tokens per second: 43.14480825013987, Peak GPU memory MB: 9376.375 +2025-08-19 00:28:41 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] Inference time: 23.44 seconds, CPU usage: 43.9%, CPU core utilization: [55.3, 58.1, 24.9, 37.1] +2025-08-19 00:28:41 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] Cleaned up temporary file: temp_videos/e030af98-0a2d-4526-aa94-526fc498572a.mp4 +2025-08-19 00:28:41 - INFO - [e030af98-0a2d-4526-aa94-526fc498572a] Cleaned up temporary frame directory: temp_videos/e030af98-0a2d-4526-aa94-526fc498572a +2025-08-19 00:28:42 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_004.mp4' +2025-08-19 00:28:42 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] Video saved to temporary file: temp_videos/e02c4876-bd5e-4324-b7db-52b8ae09da1f.mp4 +2025-08-19 00:28:42 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:28:47 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:28:47 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] 30 frames saved to temp_videos/e02c4876-bd5e-4324-b7db-52b8ae09da1f +2025-08-19 00:28:47 - INFO - Prompt token length: 3584 +2025-08-19 00:29:04 - INFO - Tokens per second: 43.61223216770222, Peak GPU memory MB: 9376.375 +2025-08-19 00:29:04 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] Inference time: 22.73 seconds, CPU usage: 43.6%, CPU core utilization: [57.3, 25.4, 61.0, 30.8] +2025-08-19 00:29:04 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] Cleaned up temporary file: temp_videos/e02c4876-bd5e-4324-b7db-52b8ae09da1f.mp4 +2025-08-19 00:29:04 - INFO - [e02c4876-bd5e-4324-b7db-52b8ae09da1f] Cleaned up temporary frame directory: temp_videos/e02c4876-bd5e-4324-b7db-52b8ae09da1f +2025-08-19 00:29:04 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_005.mp4' +2025-08-19 00:29:04 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] Video saved to temporary file: temp_videos/e4e6be02-dc47-46ed-bfb1-3e7e879cb76b.mp4 +2025-08-19 00:29:04 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:29:10 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:29:10 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] 30 frames saved to temp_videos/e4e6be02-dc47-46ed-bfb1-3e7e879cb76b +2025-08-19 00:29:10 - INFO - Prompt token length: 3584 +2025-08-19 00:29:27 - INFO - Tokens per second: 43.19587492692884, Peak GPU memory MB: 9376.375 +2025-08-19 00:29:27 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] Inference time: 22.68 seconds, CPU usage: 44.7%, CPU core utilization: [28.8, 74.9, 47.1, 28.2] +2025-08-19 00:29:27 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] Cleaned up temporary file: temp_videos/e4e6be02-dc47-46ed-bfb1-3e7e879cb76b.mp4 +2025-08-19 00:29:27 - INFO - [e4e6be02-dc47-46ed-bfb1-3e7e879cb76b] Cleaned up temporary frame directory: temp_videos/e4e6be02-dc47-46ed-bfb1-3e7e879cb76b +2025-08-19 00:29:27 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_006.mp4' +2025-08-19 00:29:27 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] Video saved to temporary file: temp_videos/2742f50f-4a88-4492-af15-89c3771fa60a.mp4 +2025-08-19 00:29:27 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:29:33 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:29:33 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] 30 frames saved to temp_videos/2742f50f-4a88-4492-af15-89c3771fa60a +2025-08-19 00:29:34 - INFO - Prompt token length: 3584 +2025-08-19 00:29:51 - INFO - Tokens per second: 43.57761765477471, Peak GPU memory MB: 9376.375 +2025-08-19 00:29:51 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] Inference time: 23.98 seconds, CPU usage: 46.4%, CPU core utilization: [30.9, 30.5, 91.3, 32.8] +2025-08-19 00:29:51 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] Cleaned up temporary file: temp_videos/2742f50f-4a88-4492-af15-89c3771fa60a.mp4 +2025-08-19 00:29:51 - INFO - [2742f50f-4a88-4492-af15-89c3771fa60a] Cleaned up temporary frame directory: temp_videos/2742f50f-4a88-4492-af15-89c3771fa60a +2025-08-19 00:29:51 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_007.mp4' +2025-08-19 00:29:51 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] Video saved to temporary file: temp_videos/901b19df-4386-4313-b75e-fa7d7acefe5e.mp4 +2025-08-19 00:29:51 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:29:57 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:29:57 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] 30 frames saved to temp_videos/901b19df-4386-4313-b75e-fa7d7acefe5e +2025-08-19 00:29:57 - INFO - Prompt token length: 3584 +2025-08-19 00:30:14 - INFO - Tokens per second: 43.63718223089107, Peak GPU memory MB: 9376.375 +2025-08-19 00:30:14 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] Inference time: 22.93 seconds, CPU usage: 45.5%, CPU core utilization: [56.0, 51.6, 37.2, 37.0] +2025-08-19 00:30:14 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] Cleaned up temporary file: temp_videos/901b19df-4386-4313-b75e-fa7d7acefe5e.mp4 +2025-08-19 00:30:14 - INFO - [901b19df-4386-4313-b75e-fa7d7acefe5e] Cleaned up temporary frame directory: temp_videos/901b19df-4386-4313-b75e-fa7d7acefe5e +2025-08-19 00:30:14 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_008.mp4' +2025-08-19 00:30:14 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] Video saved to temporary file: temp_videos/bbc663d8-2fb3-4311-8c5c-78fc624138d2.mp4 +2025-08-19 00:30:14 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:30:20 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:30:20 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] 30 frames saved to temp_videos/bbc663d8-2fb3-4311-8c5c-78fc624138d2 +2025-08-19 00:30:20 - INFO - Prompt token length: 3584 +2025-08-19 00:30:37 - INFO - Tokens per second: 43.53603194535748, Peak GPU memory MB: 9376.375 +2025-08-19 00:30:37 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] Inference time: 22.86 seconds, CPU usage: 44.8%, CPU core utilization: [42.3, 28.4, 78.7, 29.8] +2025-08-19 00:30:37 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] Cleaned up temporary file: temp_videos/bbc663d8-2fb3-4311-8c5c-78fc624138d2.mp4 +2025-08-19 00:30:37 - INFO - [bbc663d8-2fb3-4311-8c5c-78fc624138d2] Cleaned up temporary frame directory: temp_videos/bbc663d8-2fb3-4311-8c5c-78fc624138d2 +2025-08-19 00:30:37 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_009.mp4' +2025-08-19 00:30:37 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] Video saved to temporary file: temp_videos/1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f.mp4 +2025-08-19 00:30:37 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:30:43 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:30:43 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] 30 frames saved to temp_videos/1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f +2025-08-19 00:30:43 - INFO - Prompt token length: 3584 +2025-08-19 00:31:00 - INFO - Tokens per second: 43.67786409828369, Peak GPU memory MB: 9376.375 +2025-08-19 00:31:00 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] Inference time: 22.94 seconds, CPU usage: 45.0%, CPU core utilization: [71.8, 28.1, 41.7, 38.4] +2025-08-19 00:31:00 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] Cleaned up temporary file: temp_videos/1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f.mp4 +2025-08-19 00:31:00 - INFO - [1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f] Cleaned up temporary frame directory: temp_videos/1c2b9ff9-bfab-4a92-aa71-35ead3adcc9f +2025-08-19 00:31:00 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_010.mp4' +2025-08-19 00:31:00 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] Video saved to temporary file: temp_videos/25775bfa-4c45-4918-8c9d-4ced872bb378.mp4 +2025-08-19 00:31:00 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:31:05 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:31:05 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] 30 frames saved to temp_videos/25775bfa-4c45-4918-8c9d-4ced872bb378 +2025-08-19 00:31:06 - INFO - Prompt token length: 3584 +2025-08-19 00:31:23 - INFO - Tokens per second: 43.654193436473435, Peak GPU memory MB: 9376.375 +2025-08-19 00:31:23 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] Inference time: 22.56 seconds, CPU usage: 44.2%, CPU core utilization: [27.0, 53.1, 58.2, 38.5] +2025-08-19 00:31:23 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] Cleaned up temporary file: temp_videos/25775bfa-4c45-4918-8c9d-4ced872bb378.mp4 +2025-08-19 00:31:23 - INFO - [25775bfa-4c45-4918-8c9d-4ced872bb378] Cleaned up temporary frame directory: temp_videos/25775bfa-4c45-4918-8c9d-4ced872bb378 +2025-08-19 00:31:23 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_011.mp4' +2025-08-19 00:31:23 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] Video saved to temporary file: temp_videos/5e304f41-75f5-4a5d-9047-51d1372ccb19.mp4 +2025-08-19 00:31:23 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:31:28 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:31:28 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] 30 frames saved to temp_videos/5e304f41-75f5-4a5d-9047-51d1372ccb19 +2025-08-19 00:31:29 - INFO - Prompt token length: 3584 +2025-08-19 00:31:47 - INFO - Tokens per second: 42.25990172456639, Peak GPU memory MB: 9376.375 +2025-08-19 00:31:47 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] Inference time: 23.76 seconds, CPU usage: 47.5%, CPU core utilization: [47.9, 37.2, 72.1, 32.9] +2025-08-19 00:31:47 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] Cleaned up temporary file: temp_videos/5e304f41-75f5-4a5d-9047-51d1372ccb19.mp4 +2025-08-19 00:31:47 - INFO - [5e304f41-75f5-4a5d-9047-51d1372ccb19] Cleaned up temporary frame directory: temp_videos/5e304f41-75f5-4a5d-9047-51d1372ccb19 +2025-08-19 00:31:47 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_012.mp4' +2025-08-19 00:31:47 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] Video saved to temporary file: temp_videos/2764d344-a10b-4b4b-9fe2-5228895300dd.mp4 +2025-08-19 00:31:47 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:31:53 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:31:53 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] 30 frames saved to temp_videos/2764d344-a10b-4b4b-9fe2-5228895300dd +2025-08-19 00:31:53 - INFO - Prompt token length: 3584 +2025-08-19 00:32:12 - INFO - Tokens per second: 42.53959316212018, Peak GPU memory MB: 9376.375 +2025-08-19 00:32:12 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] Inference time: 25.42 seconds, CPU usage: 58.0%, CPU core utilization: [49.7, 50.9, 62.3, 69.2] +2025-08-19 00:32:12 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] Cleaned up temporary file: temp_videos/2764d344-a10b-4b4b-9fe2-5228895300dd.mp4 +2025-08-19 00:32:12 - INFO - [2764d344-a10b-4b4b-9fe2-5228895300dd] Cleaned up temporary frame directory: temp_videos/2764d344-a10b-4b4b-9fe2-5228895300dd +2025-08-19 00:32:12 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_013.mp4' +2025-08-19 00:32:12 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] Video saved to temporary file: temp_videos/4610515b-7531-42e2-b291-03a984ab739a.mp4 +2025-08-19 00:32:12 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:32:18 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:32:18 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] 30 frames saved to temp_videos/4610515b-7531-42e2-b291-03a984ab739a +2025-08-19 00:32:18 - INFO - Prompt token length: 3584 +2025-08-19 00:32:35 - INFO - Tokens per second: 43.737237712906385, Peak GPU memory MB: 9376.375 +2025-08-19 00:32:35 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] Inference time: 22.70 seconds, CPU usage: 45.1%, CPU core utilization: [65.1, 42.1, 39.7, 33.5] +2025-08-19 00:32:35 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] Cleaned up temporary file: temp_videos/4610515b-7531-42e2-b291-03a984ab739a.mp4 +2025-08-19 00:32:35 - INFO - [4610515b-7531-42e2-b291-03a984ab739a] Cleaned up temporary frame directory: temp_videos/4610515b-7531-42e2-b291-03a984ab739a +2025-08-19 00:32:35 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_014.mp4' +2025-08-19 00:32:35 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] Video saved to temporary file: temp_videos/c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c.mp4 +2025-08-19 00:32:35 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:32:40 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:32:40 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] 30 frames saved to temp_videos/c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c +2025-08-19 00:32:40 - INFO - Prompt token length: 3584 +2025-08-19 00:32:58 - INFO - Tokens per second: 43.33501727893497, Peak GPU memory MB: 9376.375 +2025-08-19 00:32:58 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] Inference time: 23.14 seconds, CPU usage: 43.8%, CPU core utilization: [66.1, 47.0, 32.0, 30.1] +2025-08-19 00:32:58 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] Cleaned up temporary file: temp_videos/c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c.mp4 +2025-08-19 00:32:58 - INFO - [c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c] Cleaned up temporary frame directory: temp_videos/c2a9c747-c17c-4ab8-a0b4-1f86dde89f0c +2025-08-19 00:32:58 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_015.mp4' +2025-08-19 00:32:58 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] Video saved to temporary file: temp_videos/13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4.mp4 +2025-08-19 00:32:58 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:33:04 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:33:04 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] 30 frames saved to temp_videos/13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4 +2025-08-19 00:33:04 - INFO - Prompt token length: 3584 +2025-08-19 00:33:21 - INFO - Tokens per second: 43.71047297473334, Peak GPU memory MB: 9376.375 +2025-08-19 00:33:21 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] Inference time: 22.57 seconds, CPU usage: 47.0%, CPU core utilization: [32.8, 31.1, 61.7, 62.5] +2025-08-19 00:33:21 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] Cleaned up temporary file: temp_videos/13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4.mp4 +2025-08-19 00:33:21 - INFO - [13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4] Cleaned up temporary frame directory: temp_videos/13279ac7-d5a2-4aaa-ba42-4a36e7dc1da4 +2025-08-19 00:33:21 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_016.mp4' +2025-08-19 00:33:21 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] Video saved to temporary file: temp_videos/4a051ecf-867f-4bf1-8fe7-b2ee29b375b4.mp4 +2025-08-19 00:33:21 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:33:26 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:33:26 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] 30 frames saved to temp_videos/4a051ecf-867f-4bf1-8fe7-b2ee29b375b4 +2025-08-19 00:33:27 - INFO - Prompt token length: 3584 +2025-08-19 00:33:44 - INFO - Tokens per second: 43.54078657986269, Peak GPU memory MB: 9376.375 +2025-08-19 00:33:44 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] Inference time: 22.97 seconds, CPU usage: 45.4%, CPU core utilization: [32.7, 73.2, 36.0, 39.6] +2025-08-19 00:33:44 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] Cleaned up temporary file: temp_videos/4a051ecf-867f-4bf1-8fe7-b2ee29b375b4.mp4 +2025-08-19 00:33:44 - INFO - [4a051ecf-867f-4bf1-8fe7-b2ee29b375b4] Cleaned up temporary frame directory: temp_videos/4a051ecf-867f-4bf1-8fe7-b2ee29b375b4 +2025-08-19 00:33:44 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_017.mp4' +2025-08-19 00:33:44 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] Video saved to temporary file: temp_videos/994fc803-baab-4dde-b569-108a0ffa0208.mp4 +2025-08-19 00:33:44 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:33:49 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:33:49 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] 30 frames saved to temp_videos/994fc803-baab-4dde-b569-108a0ffa0208 +2025-08-19 00:33:50 - INFO - Prompt token length: 3584 +2025-08-19 00:34:07 - INFO - Tokens per second: 43.53893919238898, Peak GPU memory MB: 9376.375 +2025-08-19 00:34:07 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] Inference time: 22.69 seconds, CPU usage: 45.0%, CPU core utilization: [45.9, 29.2, 74.2, 30.6] +2025-08-19 00:34:07 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] Cleaned up temporary file: temp_videos/994fc803-baab-4dde-b569-108a0ffa0208.mp4 +2025-08-19 00:34:07 - INFO - [994fc803-baab-4dde-b569-108a0ffa0208] Cleaned up temporary frame directory: temp_videos/994fc803-baab-4dde-b569-108a0ffa0208 +2025-08-19 00:34:07 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_018.mp4' +2025-08-19 00:34:07 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] Video saved to temporary file: temp_videos/5606584a-f2db-4dd9-9e06-23a5c7ae5a58.mp4 +2025-08-19 00:34:07 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:34:12 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:34:12 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] 30 frames saved to temp_videos/5606584a-f2db-4dd9-9e06-23a5c7ae5a58 +2025-08-19 00:34:12 - INFO - Prompt token length: 3584 +2025-08-19 00:34:30 - INFO - Tokens per second: 42.77804507220416, Peak GPU memory MB: 9376.375 +2025-08-19 00:34:30 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] Inference time: 23.72 seconds, CPU usage: 45.6%, CPU core utilization: [33.4, 46.8, 28.9, 73.0] +2025-08-19 00:34:30 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] Cleaned up temporary file: temp_videos/5606584a-f2db-4dd9-9e06-23a5c7ae5a58.mp4 +2025-08-19 00:34:30 - INFO - [5606584a-f2db-4dd9-9e06-23a5c7ae5a58] Cleaned up temporary frame directory: temp_videos/5606584a-f2db-4dd9-9e06-23a5c7ae5a58 +2025-08-19 00:34:30 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_019.mp4' +2025-08-19 00:34:30 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] Video saved to temporary file: temp_videos/c4534a25-5702-4922-96e2-e5c790e9bc6b.mp4 +2025-08-19 00:34:30 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:34:36 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:34:36 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] 30 frames saved to temp_videos/c4534a25-5702-4922-96e2-e5c790e9bc6b +2025-08-19 00:34:36 - INFO - Prompt token length: 3584 +2025-08-19 00:34:53 - INFO - Tokens per second: 40.6841438932034, Peak GPU memory MB: 9376.375 +2025-08-19 00:34:53 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] Inference time: 23.05 seconds, CPU usage: 54.3%, CPU core utilization: [46.2, 41.7, 86.8, 42.3] +2025-08-19 00:34:53 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] Cleaned up temporary file: temp_videos/c4534a25-5702-4922-96e2-e5c790e9bc6b.mp4 +2025-08-19 00:34:53 - INFO - [c4534a25-5702-4922-96e2-e5c790e9bc6b] Cleaned up temporary frame directory: temp_videos/c4534a25-5702-4922-96e2-e5c790e9bc6b +2025-08-19 00:34:54 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_020.mp4' +2025-08-19 00:34:54 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] Video saved to temporary file: temp_videos/a6605c13-f678-4b76-823c-64c07ccfa104.mp4 +2025-08-19 00:34:54 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:34:59 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:34:59 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] 30 frames saved to temp_videos/a6605c13-f678-4b76-823c-64c07ccfa104 +2025-08-19 00:34:59 - INFO - Prompt token length: 3584 +2025-08-19 00:35:17 - INFO - Tokens per second: 43.20499224445443, Peak GPU memory MB: 9376.375 +2025-08-19 00:35:17 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] Inference time: 23.12 seconds, CPU usage: 49.5%, CPU core utilization: [42.4, 46.3, 74.6, 34.7] +2025-08-19 00:35:17 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] Cleaned up temporary file: temp_videos/a6605c13-f678-4b76-823c-64c07ccfa104.mp4 +2025-08-19 00:35:17 - INFO - [a6605c13-f678-4b76-823c-64c07ccfa104] Cleaned up temporary frame directory: temp_videos/a6605c13-f678-4b76-823c-64c07ccfa104 +2025-08-19 00:35:17 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_021.mp4' +2025-08-19 00:35:17 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] Video saved to temporary file: temp_videos/31f98437-d0f8-4393-8ae4-6ea6c23cf010.mp4 +2025-08-19 00:35:17 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:35:22 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:35:22 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] 30 frames saved to temp_videos/31f98437-d0f8-4393-8ae4-6ea6c23cf010 +2025-08-19 00:35:22 - INFO - Prompt token length: 3584 +2025-08-19 00:35:39 - INFO - Tokens per second: 43.746940726148665, Peak GPU memory MB: 9376.375 +2025-08-19 00:35:39 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] Inference time: 22.60 seconds, CPU usage: 43.7%, CPU core utilization: [31.5, 63.2, 28.2, 51.8] +2025-08-19 00:35:39 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] Cleaned up temporary file: temp_videos/31f98437-d0f8-4393-8ae4-6ea6c23cf010.mp4 +2025-08-19 00:35:39 - INFO - [31f98437-d0f8-4393-8ae4-6ea6c23cf010] Cleaned up temporary frame directory: temp_videos/31f98437-d0f8-4393-8ae4-6ea6c23cf010 +2025-08-19 00:35:39 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_022.mp4' +2025-08-19 00:35:39 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] Video saved to temporary file: temp_videos/40bb760d-6921-44f3-a091-33b7835be611.mp4 +2025-08-19 00:35:39 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:35:44 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:35:44 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] 30 frames saved to temp_videos/40bb760d-6921-44f3-a091-33b7835be611 +2025-08-19 00:35:45 - INFO - Prompt token length: 3584 +2025-08-19 00:36:02 - INFO - Tokens per second: 43.45687531076368, Peak GPU memory MB: 9376.375 +2025-08-19 00:36:02 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] Inference time: 22.24 seconds, CPU usage: 43.8%, CPU core utilization: [31.2, 36.9, 42.2, 64.8] +2025-08-19 00:36:02 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] Cleaned up temporary file: temp_videos/40bb760d-6921-44f3-a091-33b7835be611.mp4 +2025-08-19 00:36:02 - INFO - [40bb760d-6921-44f3-a091-33b7835be611] Cleaned up temporary frame directory: temp_videos/40bb760d-6921-44f3-a091-33b7835be611 +2025-08-19 00:36:02 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_023.mp4' +2025-08-19 00:36:02 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] Video saved to temporary file: temp_videos/aea8de20-54da-4e43-ba94-3c5bdb461925.mp4 +2025-08-19 00:36:02 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:36:06 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:36:06 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] 30 frames saved to temp_videos/aea8de20-54da-4e43-ba94-3c5bdb461925 +2025-08-19 00:36:07 - INFO - Prompt token length: 3584 +2025-08-19 00:36:24 - INFO - Tokens per second: 43.49020834357343, Peak GPU memory MB: 9376.375 +2025-08-19 00:36:24 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] Inference time: 22.04 seconds, CPU usage: 43.7%, CPU core utilization: [70.2, 24.4, 48.9, 31.3] +2025-08-19 00:36:24 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] Cleaned up temporary file: temp_videos/aea8de20-54da-4e43-ba94-3c5bdb461925.mp4 +2025-08-19 00:36:24 - INFO - [aea8de20-54da-4e43-ba94-3c5bdb461925] Cleaned up temporary frame directory: temp_videos/aea8de20-54da-4e43-ba94-3c5bdb461925 +2025-08-19 00:36:24 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_024.mp4' +2025-08-19 00:36:24 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] Video saved to temporary file: temp_videos/b6f229e3-0c87-428a-9cb0-1830d2684f26.mp4 +2025-08-19 00:36:24 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:36:29 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:36:29 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] 30 frames saved to temp_videos/b6f229e3-0c87-428a-9cb0-1830d2684f26 +2025-08-19 00:36:30 - INFO - Prompt token length: 3584 +2025-08-19 00:36:47 - INFO - Tokens per second: 43.259381943491896, Peak GPU memory MB: 9376.375 +2025-08-19 00:36:47 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] Inference time: 23.63 seconds, CPU usage: 44.7%, CPU core utilization: [63.8, 43.0, 41.4, 30.7] +2025-08-19 00:36:47 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] Cleaned up temporary file: temp_videos/b6f229e3-0c87-428a-9cb0-1830d2684f26.mp4 +2025-08-19 00:36:47 - INFO - [b6f229e3-0c87-428a-9cb0-1830d2684f26] Cleaned up temporary frame directory: temp_videos/b6f229e3-0c87-428a-9cb0-1830d2684f26 +2025-08-19 00:36:48 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_025.mp4' +2025-08-19 00:36:48 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] Video saved to temporary file: temp_videos/cd77c607-1686-4879-8c10-ef8b16706cf1.mp4 +2025-08-19 00:36:48 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:36:53 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:36:53 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] 30 frames saved to temp_videos/cd77c607-1686-4879-8c10-ef8b16706cf1 +2025-08-19 00:36:53 - INFO - Prompt token length: 3584 +2025-08-19 00:37:11 - INFO - Tokens per second: 42.31568205191284, Peak GPU memory MB: 9376.375 +2025-08-19 00:37:11 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] Inference time: 23.54 seconds, CPU usage: 47.6%, CPU core utilization: [43.2, 34.2, 80.0, 33.0] +2025-08-19 00:37:11 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] Cleaned up temporary file: temp_videos/cd77c607-1686-4879-8c10-ef8b16706cf1.mp4 +2025-08-19 00:37:11 - INFO - [cd77c607-1686-4879-8c10-ef8b16706cf1] Cleaned up temporary frame directory: temp_videos/cd77c607-1686-4879-8c10-ef8b16706cf1 +2025-08-19 00:37:11 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_026.mp4' +2025-08-19 00:37:11 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] Video saved to temporary file: temp_videos/45aedb54-179c-42c7-8684-63667aaf0ec5.mp4 +2025-08-19 00:37:11 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:37:17 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:37:17 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] 30 frames saved to temp_videos/45aedb54-179c-42c7-8684-63667aaf0ec5 +2025-08-19 00:37:18 - INFO - Prompt token length: 3584 +2025-08-19 00:37:35 - INFO - Tokens per second: 43.71294869542146, Peak GPU memory MB: 9376.375 +2025-08-19 00:37:35 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] Inference time: 23.51 seconds, CPU usage: 57.7%, CPU core utilization: [47.3, 49.4, 90.4, 43.8] +2025-08-19 00:37:35 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] Cleaned up temporary file: temp_videos/45aedb54-179c-42c7-8684-63667aaf0ec5.mp4 +2025-08-19 00:37:35 - INFO - [45aedb54-179c-42c7-8684-63667aaf0ec5] Cleaned up temporary frame directory: temp_videos/45aedb54-179c-42c7-8684-63667aaf0ec5 +2025-08-19 00:37:35 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_027.mp4' +2025-08-19 00:37:35 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] Video saved to temporary file: temp_videos/d6812e19-7d6f-4e25-9f25-30fc0d87b43d.mp4 +2025-08-19 00:37:35 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:37:40 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:37:40 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] 30 frames saved to temp_videos/d6812e19-7d6f-4e25-9f25-30fc0d87b43d +2025-08-19 00:37:41 - INFO - Prompt token length: 3584 +2025-08-19 00:37:58 - INFO - Tokens per second: 43.47154398494139, Peak GPU memory MB: 9376.375 +2025-08-19 00:37:58 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] Inference time: 23.51 seconds, CPU usage: 54.7%, CPU core utilization: [71.9, 45.5, 54.5, 46.9] +2025-08-19 00:37:58 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] Cleaned up temporary file: temp_videos/d6812e19-7d6f-4e25-9f25-30fc0d87b43d.mp4 +2025-08-19 00:37:58 - INFO - [d6812e19-7d6f-4e25-9f25-30fc0d87b43d] Cleaned up temporary frame directory: temp_videos/d6812e19-7d6f-4e25-9f25-30fc0d87b43d +2025-08-19 00:37:58 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_028.mp4' +2025-08-19 00:37:58 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] Video saved to temporary file: temp_videos/c1ee46de-96fd-48a7-a18d-854aa9e83094.mp4 +2025-08-19 00:37:58 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:38:04 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:38:04 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] 30 frames saved to temp_videos/c1ee46de-96fd-48a7-a18d-854aa9e83094 +2025-08-19 00:38:04 - INFO - Prompt token length: 3584 +2025-08-19 00:38:21 - INFO - Tokens per second: 43.76276301342597, Peak GPU memory MB: 9376.375 +2025-08-19 00:38:21 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] Inference time: 22.58 seconds, CPU usage: 47.9%, CPU core utilization: [38.6, 39.2, 72.7, 41.2] +2025-08-19 00:38:21 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] Cleaned up temporary file: temp_videos/c1ee46de-96fd-48a7-a18d-854aa9e83094.mp4 +2025-08-19 00:38:21 - INFO - [c1ee46de-96fd-48a7-a18d-854aa9e83094] Cleaned up temporary frame directory: temp_videos/c1ee46de-96fd-48a7-a18d-854aa9e83094 +2025-08-19 00:38:21 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_029.mp4' +2025-08-19 00:38:21 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] Video saved to temporary file: temp_videos/18089c8d-6e44-447a-b7f0-f1cdf82e40f6.mp4 +2025-08-19 00:38:21 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:38:26 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:38:26 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] 30 frames saved to temp_videos/18089c8d-6e44-447a-b7f0-f1cdf82e40f6 +2025-08-19 00:38:27 - INFO - Prompt token length: 3584 +2025-08-19 00:38:44 - INFO - Tokens per second: 43.4272358854674, Peak GPU memory MB: 9376.375 +2025-08-19 00:38:44 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] Inference time: 23.12 seconds, CPU usage: 44.1%, CPU core utilization: [26.9, 28.3, 93.0, 28.4] +2025-08-19 00:38:44 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] Cleaned up temporary file: temp_videos/18089c8d-6e44-447a-b7f0-f1cdf82e40f6.mp4 +2025-08-19 00:38:44 - INFO - [18089c8d-6e44-447a-b7f0-f1cdf82e40f6] Cleaned up temporary frame directory: temp_videos/18089c8d-6e44-447a-b7f0-f1cdf82e40f6 +2025-08-19 00:38:44 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_030.mp4' +2025-08-19 00:38:44 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] Video saved to temporary file: temp_videos/d4dab866-28bd-4075-9952-69dc9f6ab262.mp4 +2025-08-19 00:38:44 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:38:51 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:38:51 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] 30 frames saved to temp_videos/d4dab866-28bd-4075-9952-69dc9f6ab262 +2025-08-19 00:38:51 - INFO - Prompt token length: 3584 +2025-08-19 00:39:08 - INFO - Tokens per second: 43.72120414399684, Peak GPU memory MB: 9376.375 +2025-08-19 00:39:08 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] Inference time: 23.72 seconds, CPU usage: 47.6%, CPU core utilization: [33.4, 49.4, 32.4, 75.2] +2025-08-19 00:39:08 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] Cleaned up temporary file: temp_videos/d4dab866-28bd-4075-9952-69dc9f6ab262.mp4 +2025-08-19 00:39:08 - INFO - [d4dab866-28bd-4075-9952-69dc9f6ab262] Cleaned up temporary frame directory: temp_videos/d4dab866-28bd-4075-9952-69dc9f6ab262 +2025-08-19 00:39:08 - INFO - [70021715-c064-4359-ae0b-a826669fca48] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_031.mp4' +2025-08-19 00:39:08 - INFO - [70021715-c064-4359-ae0b-a826669fca48] Video saved to temporary file: temp_videos/70021715-c064-4359-ae0b-a826669fca48.mp4 +2025-08-19 00:39:08 - INFO - [70021715-c064-4359-ae0b-a826669fca48] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:39:14 - INFO - [70021715-c064-4359-ae0b-a826669fca48] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:39:14 - INFO - [70021715-c064-4359-ae0b-a826669fca48] 30 frames saved to temp_videos/70021715-c064-4359-ae0b-a826669fca48 +2025-08-19 00:39:15 - INFO - Prompt token length: 3584 +2025-08-19 00:39:32 - INFO - Tokens per second: 43.42996315564404, Peak GPU memory MB: 9376.375 +2025-08-19 00:39:32 - INFO - [70021715-c064-4359-ae0b-a826669fca48] Inference time: 23.88 seconds, CPU usage: 46.2%, CPU core utilization: [38.6, 73.1, 26.1, 46.8] +2025-08-19 00:39:32 - INFO - [70021715-c064-4359-ae0b-a826669fca48] Cleaned up temporary file: temp_videos/70021715-c064-4359-ae0b-a826669fca48.mp4 +2025-08-19 00:39:32 - INFO - [70021715-c064-4359-ae0b-a826669fca48] Cleaned up temporary frame directory: temp_videos/70021715-c064-4359-ae0b-a826669fca48 +2025-08-19 00:39:32 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_032.mp4' +2025-08-19 00:39:32 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] Video saved to temporary file: temp_videos/35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1.mp4 +2025-08-19 00:39:32 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:39:38 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:39:38 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] 30 frames saved to temp_videos/35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1 +2025-08-19 00:39:38 - INFO - Prompt token length: 3584 +2025-08-19 00:39:55 - INFO - Tokens per second: 43.38448764784339, Peak GPU memory MB: 9376.375 +2025-08-19 00:39:55 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] Inference time: 23.33 seconds, CPU usage: 46.9%, CPU core utilization: [29.8, 31.7, 94.3, 31.8] +2025-08-19 00:39:55 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] Cleaned up temporary file: temp_videos/35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1.mp4 +2025-08-19 00:39:55 - INFO - [35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1] Cleaned up temporary frame directory: temp_videos/35a5f9f3-77e1-40ec-92e4-d8d9e89b5dd1 +2025-08-19 00:39:55 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_033.mp4' +2025-08-19 00:39:55 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] Video saved to temporary file: temp_videos/2ab9839d-d3f4-431e-a07e-52d5448a43b5.mp4 +2025-08-19 00:39:55 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:40:00 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:40:00 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] 30 frames saved to temp_videos/2ab9839d-d3f4-431e-a07e-52d5448a43b5 +2025-08-19 00:40:01 - INFO - Prompt token length: 3584 +2025-08-19 00:40:18 - INFO - Tokens per second: 43.41332866806552, Peak GPU memory MB: 9376.375 +2025-08-19 00:40:18 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] Inference time: 22.96 seconds, CPU usage: 43.6%, CPU core utilization: [32.1, 36.5, 63.5, 42.5] +2025-08-19 00:40:18 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] Cleaned up temporary file: temp_videos/2ab9839d-d3f4-431e-a07e-52d5448a43b5.mp4 +2025-08-19 00:40:18 - INFO - [2ab9839d-d3f4-431e-a07e-52d5448a43b5] Cleaned up temporary frame directory: temp_videos/2ab9839d-d3f4-431e-a07e-52d5448a43b5 +2025-08-19 00:40:18 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_034.mp4' +2025-08-19 00:40:18 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] Video saved to temporary file: temp_videos/f45424b5-3d03-4ee0-bddd-025068566a75.mp4 +2025-08-19 00:40:18 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:40:23 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:40:23 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] 30 frames saved to temp_videos/f45424b5-3d03-4ee0-bddd-025068566a75 +2025-08-19 00:40:24 - INFO - Prompt token length: 3584 +2025-08-19 00:40:40 - INFO - Tokens per second: 43.954712321925356, Peak GPU memory MB: 9376.375 +2025-08-19 00:40:40 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] Inference time: 21.96 seconds, CPU usage: 43.5%, CPU core utilization: [62.0, 32.9, 56.9, 22.4] +2025-08-19 00:40:40 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] Cleaned up temporary file: temp_videos/f45424b5-3d03-4ee0-bddd-025068566a75.mp4 +2025-08-19 00:40:40 - INFO - [f45424b5-3d03-4ee0-bddd-025068566a75] Cleaned up temporary frame directory: temp_videos/f45424b5-3d03-4ee0-bddd-025068566a75 +2025-08-19 00:40:40 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_035.mp4' +2025-08-19 00:40:40 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] Video saved to temporary file: temp_videos/23805fa4-3d69-48c1-bccb-1b0781850b9d.mp4 +2025-08-19 00:40:40 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:40:47 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:40:47 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] 30 frames saved to temp_videos/23805fa4-3d69-48c1-bccb-1b0781850b9d +2025-08-19 00:40:47 - INFO - Prompt token length: 3584 +2025-08-19 00:41:05 - INFO - Tokens per second: 43.59995476967648, Peak GPU memory MB: 9376.375 +2025-08-19 00:41:05 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] Inference time: 24.15 seconds, CPU usage: 46.1%, CPU core utilization: [35.3, 75.6, 34.6, 38.8] +2025-08-19 00:41:05 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] Cleaned up temporary file: temp_videos/23805fa4-3d69-48c1-bccb-1b0781850b9d.mp4 +2025-08-19 00:41:05 - INFO - [23805fa4-3d69-48c1-bccb-1b0781850b9d] Cleaned up temporary frame directory: temp_videos/23805fa4-3d69-48c1-bccb-1b0781850b9d +2025-08-19 00:41:05 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_036.mp4' +2025-08-19 00:41:05 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] Video saved to temporary file: temp_videos/c253da32-cb7f-4834-9baa-231a1356b50d.mp4 +2025-08-19 00:41:05 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:41:10 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:41:10 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] 30 frames saved to temp_videos/c253da32-cb7f-4834-9baa-231a1356b50d +2025-08-19 00:41:10 - INFO - Prompt token length: 3584 +2025-08-19 00:41:28 - INFO - Tokens per second: 43.36332420549335, Peak GPU memory MB: 9376.375 +2025-08-19 00:41:28 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] Inference time: 23.62 seconds, CPU usage: 43.5%, CPU core utilization: [24.5, 43.3, 30.1, 76.2] +2025-08-19 00:41:28 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] Cleaned up temporary file: temp_videos/c253da32-cb7f-4834-9baa-231a1356b50d.mp4 +2025-08-19 00:41:28 - INFO - [c253da32-cb7f-4834-9baa-231a1356b50d] Cleaned up temporary frame directory: temp_videos/c253da32-cb7f-4834-9baa-231a1356b50d +2025-08-19 00:41:28 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_037.mp4' +2025-08-19 00:41:28 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] Video saved to temporary file: temp_videos/d3785473-5b3d-46f0-96c8-691bf4b8ae78.mp4 +2025-08-19 00:41:28 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:41:33 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:41:33 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] 30 frames saved to temp_videos/d3785473-5b3d-46f0-96c8-691bf4b8ae78 +2025-08-19 00:41:34 - INFO - Prompt token length: 3584 +2025-08-19 00:41:51 - INFO - Tokens per second: 42.54911962641917, Peak GPU memory MB: 9376.375 +2025-08-19 00:41:51 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] Inference time: 22.73 seconds, CPU usage: 46.7%, CPU core utilization: [36.6, 67.4, 38.1, 44.6] +2025-08-19 00:41:51 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] Cleaned up temporary file: temp_videos/d3785473-5b3d-46f0-96c8-691bf4b8ae78.mp4 +2025-08-19 00:41:51 - INFO - [d3785473-5b3d-46f0-96c8-691bf4b8ae78] Cleaned up temporary frame directory: temp_videos/d3785473-5b3d-46f0-96c8-691bf4b8ae78 +2025-08-19 00:41:51 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_038.mp4' +2025-08-19 00:41:51 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] Video saved to temporary file: temp_videos/8322c82d-c36d-4a3f-bd2b-e3c8828fedfd.mp4 +2025-08-19 00:41:51 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:41:56 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:41:56 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] 30 frames saved to temp_videos/8322c82d-c36d-4a3f-bd2b-e3c8828fedfd +2025-08-19 00:41:56 - INFO - Prompt token length: 3584 +2025-08-19 00:42:13 - INFO - Tokens per second: 43.650739641437625, Peak GPU memory MB: 9376.375 +2025-08-19 00:42:13 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] Inference time: 21.58 seconds, CPU usage: 43.1%, CPU core utilization: [30.0, 28.1, 88.6, 25.6] +2025-08-19 00:42:13 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] Cleaned up temporary file: temp_videos/8322c82d-c36d-4a3f-bd2b-e3c8828fedfd.mp4 +2025-08-19 00:42:13 - INFO - [8322c82d-c36d-4a3f-bd2b-e3c8828fedfd] Cleaned up temporary frame directory: temp_videos/8322c82d-c36d-4a3f-bd2b-e3c8828fedfd +2025-08-19 00:42:13 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_039.mp4' +2025-08-19 00:42:13 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] Video saved to temporary file: temp_videos/b886d052-b61d-4244-9f96-8935f4af6941.mp4 +2025-08-19 00:42:13 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:42:18 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:42:18 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] 30 frames saved to temp_videos/b886d052-b61d-4244-9f96-8935f4af6941 +2025-08-19 00:42:19 - INFO - Prompt token length: 3584 +2025-08-19 00:42:35 - INFO - Tokens per second: 43.762603428053495, Peak GPU memory MB: 9376.375 +2025-08-19 00:42:35 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] Inference time: 22.65 seconds, CPU usage: 45.5%, CPU core utilization: [33.0, 49.0, 26.4, 73.2] +2025-08-19 00:42:35 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] Cleaned up temporary file: temp_videos/b886d052-b61d-4244-9f96-8935f4af6941.mp4 +2025-08-19 00:42:35 - INFO - [b886d052-b61d-4244-9f96-8935f4af6941] Cleaned up temporary frame directory: temp_videos/b886d052-b61d-4244-9f96-8935f4af6941 +2025-08-19 00:42:36 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_040.mp4' +2025-08-19 00:42:36 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] Video saved to temporary file: temp_videos/e180bc76-082a-4587-ad91-13a71b0596d1.mp4 +2025-08-19 00:42:36 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:42:42 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:42:42 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] 30 frames saved to temp_videos/e180bc76-082a-4587-ad91-13a71b0596d1 +2025-08-19 00:42:42 - INFO - Prompt token length: 3584 +2025-08-19 00:42:59 - INFO - Tokens per second: 43.78116935254593, Peak GPU memory MB: 9376.375 +2025-08-19 00:42:59 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] Inference time: 23.80 seconds, CPU usage: 46.5%, CPU core utilization: [34.9, 50.4, 61.9, 38.7] +2025-08-19 00:42:59 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] Cleaned up temporary file: temp_videos/e180bc76-082a-4587-ad91-13a71b0596d1.mp4 +2025-08-19 00:42:59 - INFO - [e180bc76-082a-4587-ad91-13a71b0596d1] Cleaned up temporary frame directory: temp_videos/e180bc76-082a-4587-ad91-13a71b0596d1 +2025-08-19 00:42:59 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_041.mp4' +2025-08-19 00:42:59 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] Video saved to temporary file: temp_videos/fa640386-4f6e-4fd4-aafd-b8129483b8f7.mp4 +2025-08-19 00:42:59 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:43:04 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:43:04 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] 30 frames saved to temp_videos/fa640386-4f6e-4fd4-aafd-b8129483b8f7 +2025-08-19 00:43:04 - INFO - Prompt token length: 3584 +2025-08-19 00:43:22 - INFO - Tokens per second: 43.213784548431164, Peak GPU memory MB: 9376.375 +2025-08-19 00:43:22 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] Inference time: 22.23 seconds, CPU usage: 43.9%, CPU core utilization: [26.1, 34.8, 40.3, 74.4] +2025-08-19 00:43:22 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] Cleaned up temporary file: temp_videos/fa640386-4f6e-4fd4-aafd-b8129483b8f7.mp4 +2025-08-19 00:43:22 - INFO - [fa640386-4f6e-4fd4-aafd-b8129483b8f7] Cleaned up temporary frame directory: temp_videos/fa640386-4f6e-4fd4-aafd-b8129483b8f7 +2025-08-19 00:43:22 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_042.mp4' +2025-08-19 00:43:22 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] Video saved to temporary file: temp_videos/6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d.mp4 +2025-08-19 00:43:22 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:43:27 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:43:27 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] 30 frames saved to temp_videos/6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d +2025-08-19 00:43:27 - INFO - Prompt token length: 3584 +2025-08-19 00:43:46 - INFO - Tokens per second: 42.19237599556414, Peak GPU memory MB: 9376.375 +2025-08-19 00:43:46 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] Inference time: 23.83 seconds, CPU usage: 55.3%, CPU core utilization: [45.5, 44.3, 48.7, 82.7] +2025-08-19 00:43:46 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] Cleaned up temporary file: temp_videos/6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d.mp4 +2025-08-19 00:43:46 - INFO - [6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d] Cleaned up temporary frame directory: temp_videos/6fe45fc4-d0ec-472e-83ac-29ad2bf06f6d +2025-08-19 00:43:46 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_043.mp4' +2025-08-19 00:43:46 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] Video saved to temporary file: temp_videos/cdcd33de-10be-4a1d-9550-e83451e06027.mp4 +2025-08-19 00:43:46 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:43:52 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:43:52 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] 30 frames saved to temp_videos/cdcd33de-10be-4a1d-9550-e83451e06027 +2025-08-19 00:43:52 - INFO - Prompt token length: 3584 +2025-08-19 00:44:10 - INFO - Tokens per second: 43.38041166364092, Peak GPU memory MB: 9376.375 +2025-08-19 00:44:10 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] Inference time: 24.08 seconds, CPU usage: 46.5%, CPU core utilization: [60.8, 28.2, 62.6, 34.5] +2025-08-19 00:44:10 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] Cleaned up temporary file: temp_videos/cdcd33de-10be-4a1d-9550-e83451e06027.mp4 +2025-08-19 00:44:10 - INFO - [cdcd33de-10be-4a1d-9550-e83451e06027] Cleaned up temporary frame directory: temp_videos/cdcd33de-10be-4a1d-9550-e83451e06027 +2025-08-19 00:44:10 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_044.mp4' +2025-08-19 00:44:10 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] Video saved to temporary file: temp_videos/b2916ada-aec1-4ed7-84f8-8457cb16b7fa.mp4 +2025-08-19 00:44:10 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:44:16 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:44:16 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] 30 frames saved to temp_videos/b2916ada-aec1-4ed7-84f8-8457cb16b7fa +2025-08-19 00:44:16 - INFO - Prompt token length: 3584 +2025-08-19 00:44:33 - INFO - Tokens per second: 43.951395848409305, Peak GPU memory MB: 9376.375 +2025-08-19 00:44:33 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] Inference time: 22.81 seconds, CPU usage: 47.3%, CPU core utilization: [63.5, 34.4, 59.6, 31.7] +2025-08-19 00:44:33 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] Cleaned up temporary file: temp_videos/b2916ada-aec1-4ed7-84f8-8457cb16b7fa.mp4 +2025-08-19 00:44:33 - INFO - [b2916ada-aec1-4ed7-84f8-8457cb16b7fa] Cleaned up temporary frame directory: temp_videos/b2916ada-aec1-4ed7-84f8-8457cb16b7fa +2025-08-19 00:44:33 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_045.mp4' +2025-08-19 00:44:33 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] Video saved to temporary file: temp_videos/6759aec4-72cd-4dc1-831c-28fcae89d503.mp4 +2025-08-19 00:44:33 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:44:37 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:44:37 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] 30 frames saved to temp_videos/6759aec4-72cd-4dc1-831c-28fcae89d503 +2025-08-19 00:44:38 - INFO - Prompt token length: 3584 +2025-08-19 00:44:55 - INFO - Tokens per second: 43.39903419831942, Peak GPU memory MB: 9376.375 +2025-08-19 00:44:55 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] Inference time: 22.50 seconds, CPU usage: 43.5%, CPU core utilization: [27.2, 22.6, 27.1, 97.1] +2025-08-19 00:44:55 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] Cleaned up temporary file: temp_videos/6759aec4-72cd-4dc1-831c-28fcae89d503.mp4 +2025-08-19 00:44:55 - INFO - [6759aec4-72cd-4dc1-831c-28fcae89d503] Cleaned up temporary frame directory: temp_videos/6759aec4-72cd-4dc1-831c-28fcae89d503 +2025-08-19 00:44:55 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_046.mp4' +2025-08-19 00:44:55 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] Video saved to temporary file: temp_videos/a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da.mp4 +2025-08-19 00:44:55 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:45:00 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:45:00 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] 30 frames saved to temp_videos/a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da +2025-08-19 00:45:00 - INFO - Prompt token length: 3584 +2025-08-19 00:45:17 - INFO - Tokens per second: 43.76197273361684, Peak GPU memory MB: 9376.375 +2025-08-19 00:45:17 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] Inference time: 21.47 seconds, CPU usage: 43.3%, CPU core utilization: [51.7, 26.1, 50.6, 44.9] +2025-08-19 00:45:17 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] Cleaned up temporary file: temp_videos/a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da.mp4 +2025-08-19 00:45:17 - INFO - [a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da] Cleaned up temporary frame directory: temp_videos/a0ca1f0f-f82b-44c2-bfbc-be86ba0e18da +2025-08-19 00:45:17 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_047.mp4' +2025-08-19 00:45:17 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] Video saved to temporary file: temp_videos/ea05a775-b069-44e6-ac69-0168df8ce6b9.mp4 +2025-08-19 00:45:17 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:45:19 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:45:19 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] 30 frames saved to temp_videos/ea05a775-b069-44e6-ac69-0168df8ce6b9 +2025-08-19 00:45:20 - INFO - Prompt token length: 3584 +2025-08-19 00:45:38 - INFO - Tokens per second: 43.197034762295125, Peak GPU memory MB: 9376.375 +2025-08-19 00:45:38 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] Inference time: 21.49 seconds, CPU usage: 36.9%, CPU core utilization: [16.8, 20.4, 20.7, 89.7] +2025-08-19 00:45:38 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] Cleaned up temporary file: temp_videos/ea05a775-b069-44e6-ac69-0168df8ce6b9.mp4 +2025-08-19 00:45:38 - INFO - [ea05a775-b069-44e6-ac69-0168df8ce6b9] Cleaned up temporary frame directory: temp_videos/ea05a775-b069-44e6-ac69-0168df8ce6b9 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250820_215936.log b/API_Transformers/logs/LFM2-VL-1.6B/20250820_215936.log new file mode 100644 index 0000000000000000000000000000000000000000..c1984275304c9881c03d2b6cf215633cd00defd4 --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250820_215936.log @@ -0,0 +1,44 @@ +2025-08-20 21:59:36 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-20 21:59:37 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-20 22:00:07 - INFO - Model loaded in 31.10 seconds +2025-08-20 22:00:07 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-20 22:07:37 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] Received new video inference request. Prompt: '视频里发生了什么?', Video: 'sample_part_001.mp4' +2025-08-20 22:07:37 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] Video saved to temporary file: temp_videos/5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0.mp4 +2025-08-20 22:07:37 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:07:43 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:07:43 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] 30 frames saved to temp_videos/5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0 +2025-08-20 22:07:44 - INFO - Prompt token length: 3585 +2025-08-20 22:08:04 - INFO - Tokens per second: 40.26488449689874, Peak GPU memory MB: 9378.375 +2025-08-20 22:08:04 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] Inference time: 27.25 seconds, CPU usage: 31.0%, CPU core utilization: [30.5, 30.3, 31.0, 32.3] +2025-08-20 22:08:04 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] Cleaned up temporary file: temp_videos/5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0.mp4 +2025-08-20 22:08:04 - INFO - [5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0] Cleaned up temporary frame directory: temp_videos/5d6ff8cd-5dd3-4c3f-bb14-6bb5a2d49aa0 +2025-08-20 22:08:04 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] Received new video inference request. Prompt: '视频里发生了什么?', Video: 'sample_part_001.mp4' +2025-08-20 22:08:04 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] Video saved to temporary file: temp_videos/7d334c4a-5ec5-47d1-9af6-5134d7017e7c.mp4 +2025-08-20 22:08:04 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:08:12 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:08:12 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] 30 frames saved to temp_videos/7d334c4a-5ec5-47d1-9af6-5134d7017e7c +2025-08-20 22:08:13 - INFO - Prompt token length: 3585 +2025-08-20 22:08:32 - INFO - Tokens per second: 41.8782758123788, Peak GPU memory MB: 9378.375 +2025-08-20 22:08:32 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] Inference time: 27.77 seconds, CPU usage: 76.0%, CPU core utilization: [65.2, 79.8, 85.9, 73.3] +2025-08-20 22:08:32 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] Cleaned up temporary file: temp_videos/7d334c4a-5ec5-47d1-9af6-5134d7017e7c.mp4 +2025-08-20 22:08:32 - INFO - [7d334c4a-5ec5-47d1-9af6-5134d7017e7c] Cleaned up temporary frame directory: temp_videos/7d334c4a-5ec5-47d1-9af6-5134d7017e7c +2025-08-20 22:08:32 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] Received new video inference request. Prompt: '视频里发生了什么?', Video: 'sample_part_002.mp4' +2025-08-20 22:08:32 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] Video saved to temporary file: temp_videos/47246552-af71-4d3c-b034-bc4f74bcfaee.mp4 +2025-08-20 22:08:32 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:08:41 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:08:41 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] 30 frames saved to temp_videos/47246552-af71-4d3c-b034-bc4f74bcfaee +2025-08-20 22:08:41 - INFO - Prompt token length: 3585 +2025-08-20 22:08:58 - INFO - Tokens per second: 37.12922143875923, Peak GPU memory MB: 9378.375 +2025-08-20 22:08:58 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] Inference time: 26.39 seconds, CPU usage: 78.7%, CPU core utilization: [77.4, 73.7, 79.6, 83.9] +2025-08-20 22:08:58 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] Cleaned up temporary file: temp_videos/47246552-af71-4d3c-b034-bc4f74bcfaee.mp4 +2025-08-20 22:08:58 - INFO - [47246552-af71-4d3c-b034-bc4f74bcfaee] Cleaned up temporary frame directory: temp_videos/47246552-af71-4d3c-b034-bc4f74bcfaee +2025-08-20 22:08:59 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] Received new video inference request. Prompt: '视频里发生了什么?', Video: 'sample_part_002.mp4' +2025-08-20 22:08:59 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] Video saved to temporary file: temp_videos/cc126bf6-5c43-48b6-aa29-d38a216bc50a.mp4 +2025-08-20 22:08:59 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:09:05 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:09:05 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] 30 frames saved to temp_videos/cc126bf6-5c43-48b6-aa29-d38a216bc50a +2025-08-20 22:09:06 - INFO - Prompt token length: 3585 +2025-08-20 22:09:23 - INFO - Tokens per second: 42.08466552693894, Peak GPU memory MB: 9378.375 +2025-08-20 22:09:23 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] Inference time: 24.01 seconds, CPU usage: 79.0%, CPU core utilization: [77.0, 80.9, 81.3, 76.9] +2025-08-20 22:09:23 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] Cleaned up temporary file: temp_videos/cc126bf6-5c43-48b6-aa29-d38a216bc50a.mp4 +2025-08-20 22:09:23 - INFO - [cc126bf6-5c43-48b6-aa29-d38a216bc50a] Cleaned up temporary frame directory: temp_videos/cc126bf6-5c43-48b6-aa29-d38a216bc50a diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250820_220950.log b/API_Transformers/logs/LFM2-VL-1.6B/20250820_220950.log new file mode 100644 index 0000000000000000000000000000000000000000..025b1e9c172e0d1b33fa23b470195ba98aefb86b --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250820_220950.log @@ -0,0 +1,54 @@ +2025-08-20 22:09:50 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-20 22:09:52 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-20 22:09:58 - INFO - Model loaded in 7.53 seconds +2025-08-20 22:09:58 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-20 22:10:07 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_001.mp4' +2025-08-20 22:10:07 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] Video saved to temporary file: temp_videos/afa49686-78ae-46eb-b2a4-8f75a70553bd.mp4 +2025-08-20 22:10:07 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:10:15 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:10:15 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] 30 frames saved to temp_videos/afa49686-78ae-46eb-b2a4-8f75a70553bd +2025-08-20 22:10:15 - INFO - Prompt token length: 3604 +2025-08-20 22:10:35 - INFO - Tokens per second: 41.32216588841384, Peak GPU memory MB: 9378.375 +2025-08-20 22:10:35 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] Inference time: 28.69 seconds, CPU usage: 70.0%, CPU core utilization: [68.6, 67.0, 70.7, 73.7] +2025-08-20 22:10:35 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] Cleaned up temporary file: temp_videos/afa49686-78ae-46eb-b2a4-8f75a70553bd.mp4 +2025-08-20 22:10:35 - INFO - [afa49686-78ae-46eb-b2a4-8f75a70553bd] Cleaned up temporary frame directory: temp_videos/afa49686-78ae-46eb-b2a4-8f75a70553bd +2025-08-20 22:10:35 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_001.mp4' +2025-08-20 22:10:36 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] Video saved to temporary file: temp_videos/3afaa144-dd25-428a-99bf-293426382d0b.mp4 +2025-08-20 22:10:36 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:10:44 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:10:44 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] 30 frames saved to temp_videos/3afaa144-dd25-428a-99bf-293426382d0b +2025-08-20 22:10:44 - INFO - Prompt token length: 3604 +2025-08-20 22:11:04 - INFO - Tokens per second: 40.60897346605822, Peak GPU memory MB: 9378.375 +2025-08-20 22:11:04 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] Inference time: 28.89 seconds, CPU usage: 75.7%, CPU core utilization: [75.0, 65.5, 73.7, 88.6] +2025-08-20 22:11:04 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] Cleaned up temporary file: temp_videos/3afaa144-dd25-428a-99bf-293426382d0b.mp4 +2025-08-20 22:11:04 - INFO - [3afaa144-dd25-428a-99bf-293426382d0b] Cleaned up temporary frame directory: temp_videos/3afaa144-dd25-428a-99bf-293426382d0b +2025-08-20 22:11:05 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_002.mp4' +2025-08-20 22:11:05 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] Video saved to temporary file: temp_videos/5a1c46da-fc10-4080-a178-c2c775a7381d.mp4 +2025-08-20 22:11:05 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:11:12 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:11:12 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] 30 frames saved to temp_videos/5a1c46da-fc10-4080-a178-c2c775a7381d +2025-08-20 22:11:13 - INFO - Prompt token length: 3604 +2025-08-20 22:11:35 - INFO - Tokens per second: 39.89175853584641, Peak GPU memory MB: 9378.375 +2025-08-20 22:11:35 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] Inference time: 30.34 seconds, CPU usage: 77.2%, CPU core utilization: [74.2, 76.3, 80.4, 77.9] +2025-08-20 22:11:35 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] Cleaned up temporary file: temp_videos/5a1c46da-fc10-4080-a178-c2c775a7381d.mp4 +2025-08-20 22:11:35 - INFO - [5a1c46da-fc10-4080-a178-c2c775a7381d] Cleaned up temporary frame directory: temp_videos/5a1c46da-fc10-4080-a178-c2c775a7381d +2025-08-20 22:11:35 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_002.mp4' +2025-08-20 22:11:35 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] Video saved to temporary file: temp_videos/edf5e6ee-7477-451f-8335-8dd2e4335d67.mp4 +2025-08-20 22:11:35 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:11:42 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:11:42 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] 30 frames saved to temp_videos/edf5e6ee-7477-451f-8335-8dd2e4335d67 +2025-08-20 22:11:43 - INFO - Prompt token length: 3604 +2025-08-20 22:12:05 - INFO - Tokens per second: 39.00521947487598, Peak GPU memory MB: 9378.375 +2025-08-20 22:12:05 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] Inference time: 30.32 seconds, CPU usage: 79.7%, CPU core utilization: [79.9, 67.1, 78.2, 93.3] +2025-08-20 22:12:05 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] Cleaned up temporary file: temp_videos/edf5e6ee-7477-451f-8335-8dd2e4335d67.mp4 +2025-08-20 22:12:05 - INFO - [edf5e6ee-7477-451f-8335-8dd2e4335d67] Cleaned up temporary frame directory: temp_videos/edf5e6ee-7477-451f-8335-8dd2e4335d67 +2025-08-20 22:12:06 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_003.mp4' +2025-08-20 22:12:06 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] Video saved to temporary file: temp_videos/0fddc63e-dbc6-4c80-8242-088580de8927.mp4 +2025-08-20 22:12:06 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:12:13 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:12:13 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] 30 frames saved to temp_videos/0fddc63e-dbc6-4c80-8242-088580de8927 +2025-08-20 22:12:13 - INFO - Prompt token length: 3604 +2025-08-20 22:12:30 - INFO - Tokens per second: 41.222647353168156, Peak GPU memory MB: 9378.375 +2025-08-20 22:12:30 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] Inference time: 24.71 seconds, CPU usage: 79.2%, CPU core utilization: [77.9, 74.8, 80.1, 83.8] +2025-08-20 22:12:30 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] Cleaned up temporary file: temp_videos/0fddc63e-dbc6-4c80-8242-088580de8927.mp4 +2025-08-20 22:12:30 - INFO - [0fddc63e-dbc6-4c80-8242-088580de8927] Cleaned up temporary frame directory: temp_videos/0fddc63e-dbc6-4c80-8242-088580de8927 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250820_221918.log b/API_Transformers/logs/LFM2-VL-1.6B/20250820_221918.log new file mode 100644 index 0000000000000000000000000000000000000000..00e10b1ddecace69d9da33ee1d8cbdfd2c2ccac9 --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250820_221918.log @@ -0,0 +1,1174 @@ +2025-08-20 22:19:18 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-20 22:19:19 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-20 22:19:25 - INFO - Model loaded in 7.27 seconds +2025-08-20 22:19:25 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-20 22:19:28 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_001.mp4' +2025-08-20 22:19:28 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] Video saved to temporary file: temp_videos/e94e2738-0fd3-4b88-82e1-08a69ca1c7a9.mp4 +2025-08-20 22:19:28 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:19:36 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:19:36 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] 30 frames saved to temp_videos/e94e2738-0fd3-4b88-82e1-08a69ca1c7a9 +2025-08-20 22:19:36 - INFO - Prompt token length: 3604 +2025-08-20 22:19:56 - INFO - Tokens per second: 39.176053401717525, Peak GPU memory MB: 9378.375 +2025-08-20 22:19:56 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] Inference time: 28.35 seconds, CPU usage: 71.5%, CPU core utilization: [68.7, 71.6, 75.1, 70.5] +2025-08-20 22:19:56 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] Cleaned up temporary file: temp_videos/e94e2738-0fd3-4b88-82e1-08a69ca1c7a9.mp4 +2025-08-20 22:19:56 - INFO - [e94e2738-0fd3-4b88-82e1-08a69ca1c7a9] Cleaned up temporary frame directory: temp_videos/e94e2738-0fd3-4b88-82e1-08a69ca1c7a9 +2025-08-20 22:19:56 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_001.mp4' +2025-08-20 22:19:56 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] Video saved to temporary file: temp_videos/8e164907-7f90-40e8-b374-12585636ef24.mp4 +2025-08-20 22:19:56 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:20:03 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:20:03 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] 30 frames saved to temp_videos/8e164907-7f90-40e8-b374-12585636ef24 +2025-08-20 22:20:03 - INFO - Prompt token length: 3604 +2025-08-20 22:20:23 - INFO - Tokens per second: 39.56194680302735, Peak GPU memory MB: 9378.375 +2025-08-20 22:20:23 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] Inference time: 27.08 seconds, CPU usage: 80.8%, CPU core utilization: [82.5, 86.9, 82.1, 71.6] +2025-08-20 22:20:23 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] Cleaned up temporary file: temp_videos/8e164907-7f90-40e8-b374-12585636ef24.mp4 +2025-08-20 22:20:23 - INFO - [8e164907-7f90-40e8-b374-12585636ef24] Cleaned up temporary frame directory: temp_videos/8e164907-7f90-40e8-b374-12585636ef24 +2025-08-20 22:20:24 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_002.mp4' +2025-08-20 22:20:24 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] Video saved to temporary file: temp_videos/f700ec81-f123-4733-8310-d1f290508bae.mp4 +2025-08-20 22:20:24 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:20:30 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:20:30 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] 30 frames saved to temp_videos/f700ec81-f123-4733-8310-d1f290508bae +2025-08-20 22:20:30 - INFO - Prompt token length: 3604 +2025-08-20 22:20:53 - INFO - Tokens per second: 39.591219952109306, Peak GPU memory MB: 9378.375 +2025-08-20 22:20:53 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] Inference time: 29.22 seconds, CPU usage: 75.2%, CPU core utilization: [78.7, 74.7, 71.3, 75.9] +2025-08-20 22:20:53 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] Cleaned up temporary file: temp_videos/f700ec81-f123-4733-8310-d1f290508bae.mp4 +2025-08-20 22:20:53 - INFO - [f700ec81-f123-4733-8310-d1f290508bae] Cleaned up temporary frame directory: temp_videos/f700ec81-f123-4733-8310-d1f290508bae +2025-08-20 22:20:53 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_002.mp4' +2025-08-20 22:20:53 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] Video saved to temporary file: temp_videos/2cb43a5e-af7f-47cd-a884-391a85924bc9.mp4 +2025-08-20 22:20:53 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:21:00 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:21:00 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] 30 frames saved to temp_videos/2cb43a5e-af7f-47cd-a884-391a85924bc9 +2025-08-20 22:21:01 - INFO - Prompt token length: 3604 +2025-08-20 22:21:23 - INFO - Tokens per second: 39.47068338412706, Peak GPU memory MB: 9378.375 +2025-08-20 22:21:23 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] Inference time: 30.13 seconds, CPU usage: 76.5%, CPU core utilization: [76.8, 64.9, 74.5, 89.6] +2025-08-20 22:21:23 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] Cleaned up temporary file: temp_videos/2cb43a5e-af7f-47cd-a884-391a85924bc9.mp4 +2025-08-20 22:21:23 - INFO - [2cb43a5e-af7f-47cd-a884-391a85924bc9] Cleaned up temporary frame directory: temp_videos/2cb43a5e-af7f-47cd-a884-391a85924bc9 +2025-08-20 22:21:23 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_003.mp4' +2025-08-20 22:21:23 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] Video saved to temporary file: temp_videos/d3999990-f57b-4179-b38a-7afdf16172b5.mp4 +2025-08-20 22:21:23 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:21:31 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:21:31 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] 30 frames saved to temp_videos/d3999990-f57b-4179-b38a-7afdf16172b5 +2025-08-20 22:21:31 - INFO - Prompt token length: 3604 +2025-08-20 22:21:49 - INFO - Tokens per second: 37.345095593526885, Peak GPU memory MB: 9378.375 +2025-08-20 22:21:49 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] Inference time: 25.28 seconds, CPU usage: 80.3%, CPU core utilization: [82.3, 71.7, 76.5, 90.4] +2025-08-20 22:21:49 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] Cleaned up temporary file: temp_videos/d3999990-f57b-4179-b38a-7afdf16172b5.mp4 +2025-08-20 22:21:49 - INFO - [d3999990-f57b-4179-b38a-7afdf16172b5] Cleaned up temporary frame directory: temp_videos/d3999990-f57b-4179-b38a-7afdf16172b5 +2025-08-20 22:21:49 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_003.mp4' +2025-08-20 22:21:49 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] Video saved to temporary file: temp_videos/c09c7f90-52c6-4bf2-ac2c-418cc0338733.mp4 +2025-08-20 22:21:49 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:21:55 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:21:55 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] 30 frames saved to temp_videos/c09c7f90-52c6-4bf2-ac2c-418cc0338733 +2025-08-20 22:21:56 - INFO - Prompt token length: 3604 +2025-08-20 22:22:13 - INFO - Tokens per second: 40.50104433917785, Peak GPU memory MB: 9378.375 +2025-08-20 22:22:13 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] Inference time: 23.93 seconds, CPU usage: 79.5%, CPU core utilization: [79.5, 75.0, 79.3, 84.2] +2025-08-20 22:22:13 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] Cleaned up temporary file: temp_videos/c09c7f90-52c6-4bf2-ac2c-418cc0338733.mp4 +2025-08-20 22:22:13 - INFO - [c09c7f90-52c6-4bf2-ac2c-418cc0338733] Cleaned up temporary frame directory: temp_videos/c09c7f90-52c6-4bf2-ac2c-418cc0338733 +2025-08-20 22:22:13 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_004.mp4' +2025-08-20 22:22:13 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] Video saved to temporary file: temp_videos/824a9617-49cb-42c7-a162-4a10bdb02903.mp4 +2025-08-20 22:22:13 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:22:21 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:22:21 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] 30 frames saved to temp_videos/824a9617-49cb-42c7-a162-4a10bdb02903 +2025-08-20 22:22:21 - INFO - Prompt token length: 3604 +2025-08-20 22:22:39 - INFO - Tokens per second: 37.87612857083515, Peak GPU memory MB: 9378.375 +2025-08-20 22:22:39 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] Inference time: 25.99 seconds, CPU usage: 79.5%, CPU core utilization: [75.3, 81.1, 84.0, 77.5] +2025-08-20 22:22:39 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] Cleaned up temporary file: temp_videos/824a9617-49cb-42c7-a162-4a10bdb02903.mp4 +2025-08-20 22:22:39 - INFO - [824a9617-49cb-42c7-a162-4a10bdb02903] Cleaned up temporary frame directory: temp_videos/824a9617-49cb-42c7-a162-4a10bdb02903 +2025-08-20 22:22:39 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_004.mp4' +2025-08-20 22:22:39 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] Video saved to temporary file: temp_videos/a7b763d8-0520-4d3c-999b-55b993864802.mp4 +2025-08-20 22:22:39 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:22:45 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:22:46 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] 30 frames saved to temp_videos/a7b763d8-0520-4d3c-999b-55b993864802 +2025-08-20 22:22:46 - INFO - Prompt token length: 3604 +2025-08-20 22:23:04 - INFO - Tokens per second: 40.64016822608698, Peak GPU memory MB: 9378.375 +2025-08-20 22:23:04 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] Inference time: 24.65 seconds, CPU usage: 77.6%, CPU core utilization: [85.7, 82.1, 72.3, 70.5] +2025-08-20 22:23:04 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] Cleaned up temporary file: temp_videos/a7b763d8-0520-4d3c-999b-55b993864802.mp4 +2025-08-20 22:23:04 - INFO - [a7b763d8-0520-4d3c-999b-55b993864802] Cleaned up temporary frame directory: temp_videos/a7b763d8-0520-4d3c-999b-55b993864802 +2025-08-20 22:23:04 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_005.mp4' +2025-08-20 22:23:04 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] Video saved to temporary file: temp_videos/8e59eb45-94db-4110-b8d9-7b5e9002a61d.mp4 +2025-08-20 22:23:04 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:23:12 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:23:12 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] 30 frames saved to temp_videos/8e59eb45-94db-4110-b8d9-7b5e9002a61d +2025-08-20 22:23:12 - INFO - Prompt token length: 3604 +2025-08-20 22:23:32 - INFO - Tokens per second: 38.38521565523533, Peak GPU memory MB: 9378.375 +2025-08-20 22:23:32 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] Inference time: 28.44 seconds, CPU usage: 76.7%, CPU core utilization: [79.8, 68.9, 72.7, 85.3] +2025-08-20 22:23:32 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] Cleaned up temporary file: temp_videos/8e59eb45-94db-4110-b8d9-7b5e9002a61d.mp4 +2025-08-20 22:23:32 - INFO - [8e59eb45-94db-4110-b8d9-7b5e9002a61d] Cleaned up temporary frame directory: temp_videos/8e59eb45-94db-4110-b8d9-7b5e9002a61d +2025-08-20 22:23:33 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_005.mp4' +2025-08-20 22:23:33 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] Video saved to temporary file: temp_videos/5b8d1d2a-a615-4ba4-b741-4c36881bfacc.mp4 +2025-08-20 22:23:33 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:23:40 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:23:40 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] 30 frames saved to temp_videos/5b8d1d2a-a615-4ba4-b741-4c36881bfacc +2025-08-20 22:23:41 - INFO - Prompt token length: 3604 +2025-08-20 22:24:01 - INFO - Tokens per second: 38.57113528231376, Peak GPU memory MB: 9378.375 +2025-08-20 22:24:01 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] Inference time: 28.64 seconds, CPU usage: 77.1%, CPU core utilization: [73.2, 79.6, 81.1, 74.7] +2025-08-20 22:24:01 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] Cleaned up temporary file: temp_videos/5b8d1d2a-a615-4ba4-b741-4c36881bfacc.mp4 +2025-08-20 22:24:01 - INFO - [5b8d1d2a-a615-4ba4-b741-4c36881bfacc] Cleaned up temporary frame directory: temp_videos/5b8d1d2a-a615-4ba4-b741-4c36881bfacc +2025-08-20 22:24:01 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_006.mp4' +2025-08-20 22:24:02 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] Video saved to temporary file: temp_videos/c0077e00-2982-41ee-aace-96617d8c8de5.mp4 +2025-08-20 22:24:02 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:24:08 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:24:08 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] 30 frames saved to temp_videos/c0077e00-2982-41ee-aace-96617d8c8de5 +2025-08-20 22:24:08 - INFO - Prompt token length: 3604 +2025-08-20 22:24:28 - INFO - Tokens per second: 40.54745884718579, Peak GPU memory MB: 9378.375 +2025-08-20 22:24:28 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] Inference time: 26.51 seconds, CPU usage: 77.3%, CPU core utilization: [79.2, 76.4, 74.3, 79.2] +2025-08-20 22:24:28 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] Cleaned up temporary file: temp_videos/c0077e00-2982-41ee-aace-96617d8c8de5.mp4 +2025-08-20 22:24:28 - INFO - [c0077e00-2982-41ee-aace-96617d8c8de5] Cleaned up temporary frame directory: temp_videos/c0077e00-2982-41ee-aace-96617d8c8de5 +2025-08-20 22:24:28 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_006.mp4' +2025-08-20 22:24:28 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] Video saved to temporary file: temp_videos/35d87ed3-6cef-4a44-a713-9d53bbcfba55.mp4 +2025-08-20 22:24:28 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:24:36 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:24:36 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] 30 frames saved to temp_videos/35d87ed3-6cef-4a44-a713-9d53bbcfba55 +2025-08-20 22:24:36 - INFO - Prompt token length: 3604 +2025-08-20 22:24:56 - INFO - Tokens per second: 40.02737695090751, Peak GPU memory MB: 9378.375 +2025-08-20 22:24:56 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] Inference time: 27.57 seconds, CPU usage: 79.0%, CPU core utilization: [80.2, 69.7, 76.0, 90.1] +2025-08-20 22:24:56 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] Cleaned up temporary file: temp_videos/35d87ed3-6cef-4a44-a713-9d53bbcfba55.mp4 +2025-08-20 22:24:56 - INFO - [35d87ed3-6cef-4a44-a713-9d53bbcfba55] Cleaned up temporary frame directory: temp_videos/35d87ed3-6cef-4a44-a713-9d53bbcfba55 +2025-08-20 22:24:56 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_007.mp4' +2025-08-20 22:24:56 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] Video saved to temporary file: temp_videos/119f28d6-447d-47e4-8e70-a0a792e9cda4.mp4 +2025-08-20 22:24:56 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:25:03 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:25:03 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] 30 frames saved to temp_videos/119f28d6-447d-47e4-8e70-a0a792e9cda4 +2025-08-20 22:25:04 - INFO - Prompt token length: 3604 +2025-08-20 22:25:20 - INFO - Tokens per second: 41.297771497632056, Peak GPU memory MB: 9378.375 +2025-08-20 22:25:20 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] Inference time: 24.52 seconds, CPU usage: 74.6%, CPU core utilization: [75.6, 72.6, 73.9, 76.5] +2025-08-20 22:25:20 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] Cleaned up temporary file: temp_videos/119f28d6-447d-47e4-8e70-a0a792e9cda4.mp4 +2025-08-20 22:25:20 - INFO - [119f28d6-447d-47e4-8e70-a0a792e9cda4] Cleaned up temporary frame directory: temp_videos/119f28d6-447d-47e4-8e70-a0a792e9cda4 +2025-08-20 22:25:21 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_007.mp4' +2025-08-20 22:25:21 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] Video saved to temporary file: temp_videos/978c75bb-6eef-41fe-87b2-77b10f5f478b.mp4 +2025-08-20 22:25:21 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:25:29 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:25:29 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] 30 frames saved to temp_videos/978c75bb-6eef-41fe-87b2-77b10f5f478b +2025-08-20 22:25:30 - INFO - Prompt token length: 3604 +2025-08-20 22:25:47 - INFO - Tokens per second: 40.57407084953663, Peak GPU memory MB: 9378.375 +2025-08-20 22:25:47 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] Inference time: 25.99 seconds, CPU usage: 79.6%, CPU core utilization: [78.2, 63.9, 78.7, 97.4] +2025-08-20 22:25:47 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] Cleaned up temporary file: temp_videos/978c75bb-6eef-41fe-87b2-77b10f5f478b.mp4 +2025-08-20 22:25:47 - INFO - [978c75bb-6eef-41fe-87b2-77b10f5f478b] Cleaned up temporary frame directory: temp_videos/978c75bb-6eef-41fe-87b2-77b10f5f478b +2025-08-20 22:25:47 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_008.mp4' +2025-08-20 22:25:47 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] Video saved to temporary file: temp_videos/3cbbc13e-21b5-407c-a202-85e09a8c2ddc.mp4 +2025-08-20 22:25:47 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:25:54 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:25:54 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] 30 frames saved to temp_videos/3cbbc13e-21b5-407c-a202-85e09a8c2ddc +2025-08-20 22:25:54 - INFO - Prompt token length: 3604 +2025-08-20 22:26:14 - INFO - Tokens per second: 37.33059104449174, Peak GPU memory MB: 9378.375 +2025-08-20 22:26:14 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] Inference time: 26.96 seconds, CPU usage: 77.7%, CPU core utilization: [78.5, 62.8, 76.3, 93.3] +2025-08-20 22:26:14 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] Cleaned up temporary file: temp_videos/3cbbc13e-21b5-407c-a202-85e09a8c2ddc.mp4 +2025-08-20 22:26:14 - INFO - [3cbbc13e-21b5-407c-a202-85e09a8c2ddc] Cleaned up temporary frame directory: temp_videos/3cbbc13e-21b5-407c-a202-85e09a8c2ddc +2025-08-20 22:26:14 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_008.mp4' +2025-08-20 22:26:14 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] Video saved to temporary file: temp_videos/47d0b154-958f-4c13-bc1c-1ee74c146ad3.mp4 +2025-08-20 22:26:14 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:26:20 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:26:20 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] 30 frames saved to temp_videos/47d0b154-958f-4c13-bc1c-1ee74c146ad3 +2025-08-20 22:26:21 - INFO - Prompt token length: 3604 +2025-08-20 22:26:40 - INFO - Tokens per second: 40.97108679897668, Peak GPU memory MB: 9378.375 +2025-08-20 22:26:40 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] Inference time: 26.01 seconds, CPU usage: 77.4%, CPU core utilization: [79.7, 59.8, 73.6, 96.6] +2025-08-20 22:26:40 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] Cleaned up temporary file: temp_videos/47d0b154-958f-4c13-bc1c-1ee74c146ad3.mp4 +2025-08-20 22:26:40 - INFO - [47d0b154-958f-4c13-bc1c-1ee74c146ad3] Cleaned up temporary frame directory: temp_videos/47d0b154-958f-4c13-bc1c-1ee74c146ad3 +2025-08-20 22:26:40 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_009.mp4' +2025-08-20 22:26:40 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] Video saved to temporary file: temp_videos/6de44e06-0d01-49ef-bc85-1b7d65affd7e.mp4 +2025-08-20 22:26:40 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:26:49 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:26:49 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] 30 frames saved to temp_videos/6de44e06-0d01-49ef-bc85-1b7d65affd7e +2025-08-20 22:26:49 - INFO - Prompt token length: 3604 +2025-08-20 22:27:09 - INFO - Tokens per second: 39.948981204658494, Peak GPU memory MB: 9378.375 +2025-08-20 22:27:09 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] Inference time: 29.25 seconds, CPU usage: 78.7%, CPU core utilization: [78.1, 74.6, 76.1, 85.9] +2025-08-20 22:27:09 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] Cleaned up temporary file: temp_videos/6de44e06-0d01-49ef-bc85-1b7d65affd7e.mp4 +2025-08-20 22:27:09 - INFO - [6de44e06-0d01-49ef-bc85-1b7d65affd7e] Cleaned up temporary frame directory: temp_videos/6de44e06-0d01-49ef-bc85-1b7d65affd7e +2025-08-20 22:27:10 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_009.mp4' +2025-08-20 22:27:10 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] Video saved to temporary file: temp_videos/20773204-9773-4845-babb-02b5c94f87a0.mp4 +2025-08-20 22:27:10 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:27:17 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:27:17 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] 30 frames saved to temp_videos/20773204-9773-4845-babb-02b5c94f87a0 +2025-08-20 22:27:18 - INFO - Prompt token length: 3604 +2025-08-20 22:27:38 - INFO - Tokens per second: 39.48549649221776, Peak GPU memory MB: 9378.375 +2025-08-20 22:27:38 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] Inference time: 28.26 seconds, CPU usage: 77.1%, CPU core utilization: [83.8, 69.4, 66.9, 88.1] +2025-08-20 22:27:38 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] Cleaned up temporary file: temp_videos/20773204-9773-4845-babb-02b5c94f87a0.mp4 +2025-08-20 22:27:38 - INFO - [20773204-9773-4845-babb-02b5c94f87a0] Cleaned up temporary frame directory: temp_videos/20773204-9773-4845-babb-02b5c94f87a0 +2025-08-20 22:27:38 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_010.mp4' +2025-08-20 22:27:38 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] Video saved to temporary file: temp_videos/aa2a66a2-808c-4e5c-b423-99e23a45f381.mp4 +2025-08-20 22:27:38 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:27:46 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:27:46 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] 30 frames saved to temp_videos/aa2a66a2-808c-4e5c-b423-99e23a45f381 +2025-08-20 22:27:47 - INFO - Prompt token length: 3604 +2025-08-20 22:28:06 - INFO - Tokens per second: 40.950792314228124, Peak GPU memory MB: 9378.375 +2025-08-20 22:28:06 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] Inference time: 28.13 seconds, CPU usage: 76.7%, CPU core utilization: [65.8, 73.5, 90.0, 77.5] +2025-08-20 22:28:06 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] Cleaned up temporary file: temp_videos/aa2a66a2-808c-4e5c-b423-99e23a45f381.mp4 +2025-08-20 22:28:06 - INFO - [aa2a66a2-808c-4e5c-b423-99e23a45f381] Cleaned up temporary frame directory: temp_videos/aa2a66a2-808c-4e5c-b423-99e23a45f381 +2025-08-20 22:28:06 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_010.mp4' +2025-08-20 22:28:07 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] Video saved to temporary file: temp_videos/722f84b8-f835-49ec-b9cf-7abc704404f5.mp4 +2025-08-20 22:28:07 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:28:14 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:28:14 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] 30 frames saved to temp_videos/722f84b8-f835-49ec-b9cf-7abc704404f5 +2025-08-20 22:28:15 - INFO - Prompt token length: 3604 +2025-08-20 22:28:35 - INFO - Tokens per second: 40.48125592448077, Peak GPU memory MB: 9378.375 +2025-08-20 22:28:35 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] Inference time: 28.07 seconds, CPU usage: 76.5%, CPU core utilization: [77.9, 58.7, 76.8, 92.3] +2025-08-20 22:28:35 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] Cleaned up temporary file: temp_videos/722f84b8-f835-49ec-b9cf-7abc704404f5.mp4 +2025-08-20 22:28:35 - INFO - [722f84b8-f835-49ec-b9cf-7abc704404f5] Cleaned up temporary frame directory: temp_videos/722f84b8-f835-49ec-b9cf-7abc704404f5 +2025-08-20 22:28:35 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_011.mp4' +2025-08-20 22:28:35 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] Video saved to temporary file: temp_videos/56c1fc8e-dbb1-431b-8990-35a4a3403517.mp4 +2025-08-20 22:28:35 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:28:43 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:28:43 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] 30 frames saved to temp_videos/56c1fc8e-dbb1-431b-8990-35a4a3403517 +2025-08-20 22:28:43 - INFO - Prompt token length: 3604 +2025-08-20 22:29:04 - INFO - Tokens per second: 38.97002996849644, Peak GPU memory MB: 9378.375 +2025-08-20 22:29:04 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] Inference time: 28.80 seconds, CPU usage: 77.8%, CPU core utilization: [82.0, 74.7, 73.1, 81.4] +2025-08-20 22:29:04 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] Cleaned up temporary file: temp_videos/56c1fc8e-dbb1-431b-8990-35a4a3403517.mp4 +2025-08-20 22:29:04 - INFO - [56c1fc8e-dbb1-431b-8990-35a4a3403517] Cleaned up temporary frame directory: temp_videos/56c1fc8e-dbb1-431b-8990-35a4a3403517 +2025-08-20 22:29:04 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_011.mp4' +2025-08-20 22:29:04 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] Video saved to temporary file: temp_videos/ce1a27bd-6bcf-44bb-940e-83518f14957b.mp4 +2025-08-20 22:29:04 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:29:10 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:29:10 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] 30 frames saved to temp_videos/ce1a27bd-6bcf-44bb-940e-83518f14957b +2025-08-20 22:29:11 - INFO - Prompt token length: 3604 +2025-08-20 22:29:31 - INFO - Tokens per second: 38.9287779528858, Peak GPU memory MB: 9378.375 +2025-08-20 22:29:31 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] Inference time: 27.48 seconds, CPU usage: 81.7%, CPU core utilization: [80.4, 82.5, 84.0, 80.1] +2025-08-20 22:29:31 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] Cleaned up temporary file: temp_videos/ce1a27bd-6bcf-44bb-940e-83518f14957b.mp4 +2025-08-20 22:29:31 - INFO - [ce1a27bd-6bcf-44bb-940e-83518f14957b] Cleaned up temporary frame directory: temp_videos/ce1a27bd-6bcf-44bb-940e-83518f14957b +2025-08-20 22:29:31 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_012.mp4' +2025-08-20 22:29:32 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] Video saved to temporary file: temp_videos/0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd.mp4 +2025-08-20 22:29:32 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:29:39 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:29:39 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] 30 frames saved to temp_videos/0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd +2025-08-20 22:29:39 - INFO - Prompt token length: 3604 +2025-08-20 22:30:00 - INFO - Tokens per second: 40.38346039864145, Peak GPU memory MB: 9378.375 +2025-08-20 22:30:00 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] Inference time: 28.03 seconds, CPU usage: 75.2%, CPU core utilization: [80.4, 65.1, 68.7, 86.3] +2025-08-20 22:30:00 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] Cleaned up temporary file: temp_videos/0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd.mp4 +2025-08-20 22:30:00 - INFO - [0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd] Cleaned up temporary frame directory: temp_videos/0f99c8fb-ff01-4a6a-82c6-ea7cbfb3b9cd +2025-08-20 22:30:00 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_012.mp4' +2025-08-20 22:30:00 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] Video saved to temporary file: temp_videos/49359fa0-f5ef-4368-af74-8cdd0bab6c37.mp4 +2025-08-20 22:30:00 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:30:06 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:30:06 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] 30 frames saved to temp_videos/49359fa0-f5ef-4368-af74-8cdd0bab6c37 +2025-08-20 22:30:07 - INFO - Prompt token length: 3604 +2025-08-20 22:30:27 - INFO - Tokens per second: 39.63977281870722, Peak GPU memory MB: 9378.375 +2025-08-20 22:30:27 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] Inference time: 27.23 seconds, CPU usage: 76.7%, CPU core utilization: [91.7, 76.0, 62.5, 76.4] +2025-08-20 22:30:27 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] Cleaned up temporary file: temp_videos/49359fa0-f5ef-4368-af74-8cdd0bab6c37.mp4 +2025-08-20 22:30:27 - INFO - [49359fa0-f5ef-4368-af74-8cdd0bab6c37] Cleaned up temporary frame directory: temp_videos/49359fa0-f5ef-4368-af74-8cdd0bab6c37 +2025-08-20 22:30:27 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_013.mp4' +2025-08-20 22:30:27 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] Video saved to temporary file: temp_videos/2026d740-4f10-41fe-b77b-892d16850275.mp4 +2025-08-20 22:30:27 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:30:34 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:30:34 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] 30 frames saved to temp_videos/2026d740-4f10-41fe-b77b-892d16850275 +2025-08-20 22:30:35 - INFO - Prompt token length: 3604 +2025-08-20 22:30:54 - INFO - Tokens per second: 39.012777556196205, Peak GPU memory MB: 9378.375 +2025-08-20 22:30:54 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] Inference time: 26.88 seconds, CPU usage: 78.8%, CPU core utilization: [83.9, 82.9, 73.8, 74.7] +2025-08-20 22:30:54 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] Cleaned up temporary file: temp_videos/2026d740-4f10-41fe-b77b-892d16850275.mp4 +2025-08-20 22:30:54 - INFO - [2026d740-4f10-41fe-b77b-892d16850275] Cleaned up temporary frame directory: temp_videos/2026d740-4f10-41fe-b77b-892d16850275 +2025-08-20 22:30:54 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_013.mp4' +2025-08-20 22:30:54 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] Video saved to temporary file: temp_videos/279b6d04-e588-4d24-8792-aa7ca95570db.mp4 +2025-08-20 22:30:54 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:31:02 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:31:02 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] 30 frames saved to temp_videos/279b6d04-e588-4d24-8792-aa7ca95570db +2025-08-20 22:31:02 - INFO - Prompt token length: 3604 +2025-08-20 22:31:21 - INFO - Tokens per second: 38.80665454390215, Peak GPU memory MB: 9378.375 +2025-08-20 22:31:21 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] Inference time: 27.22 seconds, CPU usage: 81.7%, CPU core utilization: [82.7, 73.3, 79.5, 91.2] +2025-08-20 22:31:21 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] Cleaned up temporary file: temp_videos/279b6d04-e588-4d24-8792-aa7ca95570db.mp4 +2025-08-20 22:31:21 - INFO - [279b6d04-e588-4d24-8792-aa7ca95570db] Cleaned up temporary frame directory: temp_videos/279b6d04-e588-4d24-8792-aa7ca95570db +2025-08-20 22:31:22 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_014.mp4' +2025-08-20 22:31:22 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] Video saved to temporary file: temp_videos/54f8cc9f-9a1a-4295-a7eb-8072b859e4cc.mp4 +2025-08-20 22:31:22 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:31:28 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:31:28 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] 30 frames saved to temp_videos/54f8cc9f-9a1a-4295-a7eb-8072b859e4cc +2025-08-20 22:31:28 - INFO - Prompt token length: 3604 +2025-08-20 22:31:45 - INFO - Tokens per second: 37.616265679442, Peak GPU memory MB: 9378.375 +2025-08-20 22:31:45 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] Inference time: 23.60 seconds, CPU usage: 79.2%, CPU core utilization: [81.4, 72.9, 75.7, 86.5] +2025-08-20 22:31:45 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] Cleaned up temporary file: temp_videos/54f8cc9f-9a1a-4295-a7eb-8072b859e4cc.mp4 +2025-08-20 22:31:45 - INFO - [54f8cc9f-9a1a-4295-a7eb-8072b859e4cc] Cleaned up temporary frame directory: temp_videos/54f8cc9f-9a1a-4295-a7eb-8072b859e4cc +2025-08-20 22:31:45 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_014.mp4' +2025-08-20 22:31:45 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] Video saved to temporary file: temp_videos/b28f6588-dffd-4301-af64-e07895a357bd.mp4 +2025-08-20 22:31:45 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:31:52 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:31:52 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] 30 frames saved to temp_videos/b28f6588-dffd-4301-af64-e07895a357bd +2025-08-20 22:31:53 - INFO - Prompt token length: 3604 +2025-08-20 22:32:10 - INFO - Tokens per second: 37.28115717917145, Peak GPU memory MB: 9378.375 +2025-08-20 22:32:10 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] Inference time: 24.27 seconds, CPU usage: 78.4%, CPU core utilization: [75.0, 67.1, 82.7, 88.7] +2025-08-20 22:32:10 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] Cleaned up temporary file: temp_videos/b28f6588-dffd-4301-af64-e07895a357bd.mp4 +2025-08-20 22:32:10 - INFO - [b28f6588-dffd-4301-af64-e07895a357bd] Cleaned up temporary frame directory: temp_videos/b28f6588-dffd-4301-af64-e07895a357bd +2025-08-20 22:32:10 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_015.mp4' +2025-08-20 22:32:10 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] Video saved to temporary file: temp_videos/853f6874-3c45-41a1-b1d9-a2a738b3bfcc.mp4 +2025-08-20 22:32:10 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:32:17 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:32:17 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] 30 frames saved to temp_videos/853f6874-3c45-41a1-b1d9-a2a738b3bfcc +2025-08-20 22:32:17 - INFO - Prompt token length: 3604 +2025-08-20 22:32:37 - INFO - Tokens per second: 39.86888270261289, Peak GPU memory MB: 9378.375 +2025-08-20 22:32:37 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] Inference time: 27.15 seconds, CPU usage: 75.9%, CPU core utilization: [79.5, 73.9, 74.9, 75.2] +2025-08-20 22:32:37 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] Cleaned up temporary file: temp_videos/853f6874-3c45-41a1-b1d9-a2a738b3bfcc.mp4 +2025-08-20 22:32:37 - INFO - [853f6874-3c45-41a1-b1d9-a2a738b3bfcc] Cleaned up temporary frame directory: temp_videos/853f6874-3c45-41a1-b1d9-a2a738b3bfcc +2025-08-20 22:32:37 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_015.mp4' +2025-08-20 22:32:37 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] Video saved to temporary file: temp_videos/66be1528-862a-4abf-8c42-4a366dace799.mp4 +2025-08-20 22:32:37 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:32:45 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:32:45 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] 30 frames saved to temp_videos/66be1528-862a-4abf-8c42-4a366dace799 +2025-08-20 22:32:45 - INFO - Prompt token length: 3604 +2025-08-20 22:33:06 - INFO - Tokens per second: 39.45218673810312, Peak GPU memory MB: 9378.375 +2025-08-20 22:33:06 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] Inference time: 28.37 seconds, CPU usage: 77.8%, CPU core utilization: [72.5, 73.4, 81.9, 83.0] +2025-08-20 22:33:06 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] Cleaned up temporary file: temp_videos/66be1528-862a-4abf-8c42-4a366dace799.mp4 +2025-08-20 22:33:06 - INFO - [66be1528-862a-4abf-8c42-4a366dace799] Cleaned up temporary frame directory: temp_videos/66be1528-862a-4abf-8c42-4a366dace799 +2025-08-20 22:33:06 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_016.mp4' +2025-08-20 22:33:06 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] Video saved to temporary file: temp_videos/071144ce-0c39-4716-88c7-b0b1e4658f0e.mp4 +2025-08-20 22:33:06 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:33:14 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:33:14 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] 30 frames saved to temp_videos/071144ce-0c39-4716-88c7-b0b1e4658f0e +2025-08-20 22:33:14 - INFO - Prompt token length: 3604 +2025-08-20 22:33:32 - INFO - Tokens per second: 41.188979826273595, Peak GPU memory MB: 9378.375 +2025-08-20 22:33:32 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] Inference time: 26.26 seconds, CPU usage: 80.6%, CPU core utilization: [86.1, 88.6, 75.0, 72.8] +2025-08-20 22:33:32 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] Cleaned up temporary file: temp_videos/071144ce-0c39-4716-88c7-b0b1e4658f0e.mp4 +2025-08-20 22:33:32 - INFO - [071144ce-0c39-4716-88c7-b0b1e4658f0e] Cleaned up temporary frame directory: temp_videos/071144ce-0c39-4716-88c7-b0b1e4658f0e +2025-08-20 22:33:32 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_016.mp4' +2025-08-20 22:33:32 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] Video saved to temporary file: temp_videos/f9a44b60-82c4-445d-a8e5-320f739e023e.mp4 +2025-08-20 22:33:32 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:33:40 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:33:40 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] 30 frames saved to temp_videos/f9a44b60-82c4-445d-a8e5-320f739e023e +2025-08-20 22:33:40 - INFO - Prompt token length: 3604 +2025-08-20 22:33:58 - INFO - Tokens per second: 39.37396963384792, Peak GPU memory MB: 9378.375 +2025-08-20 22:33:58 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] Inference time: 26.18 seconds, CPU usage: 77.8%, CPU core utilization: [76.8, 70.1, 78.0, 86.3] +2025-08-20 22:33:58 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] Cleaned up temporary file: temp_videos/f9a44b60-82c4-445d-a8e5-320f739e023e.mp4 +2025-08-20 22:33:58 - INFO - [f9a44b60-82c4-445d-a8e5-320f739e023e] Cleaned up temporary frame directory: temp_videos/f9a44b60-82c4-445d-a8e5-320f739e023e +2025-08-20 22:33:59 - INFO - [bff55645-4f57-49ee-8725-051114a14816] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_017.mp4' +2025-08-20 22:33:59 - INFO - [bff55645-4f57-49ee-8725-051114a14816] Video saved to temporary file: temp_videos/bff55645-4f57-49ee-8725-051114a14816.mp4 +2025-08-20 22:33:59 - INFO - [bff55645-4f57-49ee-8725-051114a14816] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:34:05 - INFO - [bff55645-4f57-49ee-8725-051114a14816] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:34:05 - INFO - [bff55645-4f57-49ee-8725-051114a14816] 30 frames saved to temp_videos/bff55645-4f57-49ee-8725-051114a14816 +2025-08-20 22:34:06 - INFO - Prompt token length: 3604 +2025-08-20 22:34:24 - INFO - Tokens per second: 40.829451790231566, Peak GPU memory MB: 9378.375 +2025-08-20 22:34:24 - INFO - [bff55645-4f57-49ee-8725-051114a14816] Inference time: 24.92 seconds, CPU usage: 79.1%, CPU core utilization: [85.1, 85.4, 73.6, 72.2] +2025-08-20 22:34:24 - INFO - [bff55645-4f57-49ee-8725-051114a14816] Cleaned up temporary file: temp_videos/bff55645-4f57-49ee-8725-051114a14816.mp4 +2025-08-20 22:34:24 - INFO - [bff55645-4f57-49ee-8725-051114a14816] Cleaned up temporary frame directory: temp_videos/bff55645-4f57-49ee-8725-051114a14816 +2025-08-20 22:34:24 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_017.mp4' +2025-08-20 22:34:24 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] Video saved to temporary file: temp_videos/42e5904b-710c-44d5-a60d-20efcce3b62d.mp4 +2025-08-20 22:34:24 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:34:30 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:34:30 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] 30 frames saved to temp_videos/42e5904b-710c-44d5-a60d-20efcce3b62d +2025-08-20 22:34:31 - INFO - Prompt token length: 3604 +2025-08-20 22:34:49 - INFO - Tokens per second: 38.884255600661035, Peak GPU memory MB: 9378.375 +2025-08-20 22:34:49 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] Inference time: 24.90 seconds, CPU usage: 83.2%, CPU core utilization: [85.3, 81.4, 80.7, 85.5] +2025-08-20 22:34:49 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] Cleaned up temporary file: temp_videos/42e5904b-710c-44d5-a60d-20efcce3b62d.mp4 +2025-08-20 22:34:49 - INFO - [42e5904b-710c-44d5-a60d-20efcce3b62d] Cleaned up temporary frame directory: temp_videos/42e5904b-710c-44d5-a60d-20efcce3b62d +2025-08-20 22:34:49 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_018.mp4' +2025-08-20 22:34:49 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] Video saved to temporary file: temp_videos/74f1be80-c5b0-49fe-b669-79448eb5c43b.mp4 +2025-08-20 22:34:49 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:34:55 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:34:55 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] 30 frames saved to temp_videos/74f1be80-c5b0-49fe-b669-79448eb5c43b +2025-08-20 22:34:56 - INFO - Prompt token length: 3604 +2025-08-20 22:35:14 - INFO - Tokens per second: 39.540108007028685, Peak GPU memory MB: 9378.375 +2025-08-20 22:35:14 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] Inference time: 25.35 seconds, CPU usage: 76.2%, CPU core utilization: [80.1, 65.1, 71.7, 87.9] +2025-08-20 22:35:14 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] Cleaned up temporary file: temp_videos/74f1be80-c5b0-49fe-b669-79448eb5c43b.mp4 +2025-08-20 22:35:14 - INFO - [74f1be80-c5b0-49fe-b669-79448eb5c43b] Cleaned up temporary frame directory: temp_videos/74f1be80-c5b0-49fe-b669-79448eb5c43b +2025-08-20 22:35:15 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_018.mp4' +2025-08-20 22:35:15 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] Video saved to temporary file: temp_videos/4abff9bb-803b-4789-8cdf-a87b71107b8a.mp4 +2025-08-20 22:35:15 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:35:22 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:35:22 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] 30 frames saved to temp_videos/4abff9bb-803b-4789-8cdf-a87b71107b8a +2025-08-20 22:35:23 - INFO - Prompt token length: 3604 +2025-08-20 22:35:41 - INFO - Tokens per second: 38.10626565587464, Peak GPU memory MB: 9378.375 +2025-08-20 22:35:41 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] Inference time: 26.76 seconds, CPU usage: 77.1%, CPU core utilization: [69.9, 75.1, 84.7, 78.9] +2025-08-20 22:35:41 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] Cleaned up temporary file: temp_videos/4abff9bb-803b-4789-8cdf-a87b71107b8a.mp4 +2025-08-20 22:35:41 - INFO - [4abff9bb-803b-4789-8cdf-a87b71107b8a] Cleaned up temporary frame directory: temp_videos/4abff9bb-803b-4789-8cdf-a87b71107b8a +2025-08-20 22:35:42 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_019.mp4' +2025-08-20 22:35:42 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] Video saved to temporary file: temp_videos/79b4b617-47dc-4863-a4b9-3c2536a5cd7d.mp4 +2025-08-20 22:35:42 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:35:49 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:35:49 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] 30 frames saved to temp_videos/79b4b617-47dc-4863-a4b9-3c2536a5cd7d +2025-08-20 22:35:49 - INFO - Prompt token length: 3604 +2025-08-20 22:36:09 - INFO - Tokens per second: 40.695375287382426, Peak GPU memory MB: 9378.375 +2025-08-20 22:36:09 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] Inference time: 26.98 seconds, CPU usage: 73.6%, CPU core utilization: [66.1, 75.3, 82.2, 71.1] +2025-08-20 22:36:09 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] Cleaned up temporary file: temp_videos/79b4b617-47dc-4863-a4b9-3c2536a5cd7d.mp4 +2025-08-20 22:36:09 - INFO - [79b4b617-47dc-4863-a4b9-3c2536a5cd7d] Cleaned up temporary frame directory: temp_videos/79b4b617-47dc-4863-a4b9-3c2536a5cd7d +2025-08-20 22:36:09 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_019.mp4' +2025-08-20 22:36:09 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] Video saved to temporary file: temp_videos/f79ea27a-5f36-4c6f-a78f-4ef7415042f4.mp4 +2025-08-20 22:36:09 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:36:17 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:36:17 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] 30 frames saved to temp_videos/f79ea27a-5f36-4c6f-a78f-4ef7415042f4 +2025-08-20 22:36:18 - INFO - Prompt token length: 3604 +2025-08-20 22:36:38 - INFO - Tokens per second: 37.79767195865447, Peak GPU memory MB: 9378.375 +2025-08-20 22:36:38 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] Inference time: 28.79 seconds, CPU usage: 77.8%, CPU core utilization: [84.9, 72.4, 69.3, 84.7] +2025-08-20 22:36:38 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] Cleaned up temporary file: temp_videos/f79ea27a-5f36-4c6f-a78f-4ef7415042f4.mp4 +2025-08-20 22:36:38 - INFO - [f79ea27a-5f36-4c6f-a78f-4ef7415042f4] Cleaned up temporary frame directory: temp_videos/f79ea27a-5f36-4c6f-a78f-4ef7415042f4 +2025-08-20 22:36:38 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_020.mp4' +2025-08-20 22:36:38 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] Video saved to temporary file: temp_videos/73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d.mp4 +2025-08-20 22:36:38 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:36:45 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:36:45 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] 30 frames saved to temp_videos/73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d +2025-08-20 22:36:46 - INFO - Prompt token length: 3604 +2025-08-20 22:37:03 - INFO - Tokens per second: 39.61199438135451, Peak GPU memory MB: 9378.375 +2025-08-20 22:37:03 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] Inference time: 25.21 seconds, CPU usage: 78.1%, CPU core utilization: [82.2, 64.5, 71.5, 94.0] +2025-08-20 22:37:03 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] Cleaned up temporary file: temp_videos/73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d.mp4 +2025-08-20 22:37:03 - INFO - [73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d] Cleaned up temporary frame directory: temp_videos/73fa4fbe-2530-4b5c-91f5-9f6a842a8f7d +2025-08-20 22:37:03 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_020.mp4' +2025-08-20 22:37:03 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] Video saved to temporary file: temp_videos/09d554f9-7e98-4e05-8aa7-11da1fab4ac5.mp4 +2025-08-20 22:37:03 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:37:10 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:37:10 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] 30 frames saved to temp_videos/09d554f9-7e98-4e05-8aa7-11da1fab4ac5 +2025-08-20 22:37:11 - INFO - Prompt token length: 3604 +2025-08-20 22:37:29 - INFO - Tokens per second: 38.21252206264074, Peak GPU memory MB: 9378.375 +2025-08-20 22:37:29 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] Inference time: 25.31 seconds, CPU usage: 81.7%, CPU core utilization: [82.3, 75.8, 79.9, 88.8] +2025-08-20 22:37:29 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] Cleaned up temporary file: temp_videos/09d554f9-7e98-4e05-8aa7-11da1fab4ac5.mp4 +2025-08-20 22:37:29 - INFO - [09d554f9-7e98-4e05-8aa7-11da1fab4ac5] Cleaned up temporary frame directory: temp_videos/09d554f9-7e98-4e05-8aa7-11da1fab4ac5 +2025-08-20 22:37:29 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_021.mp4' +2025-08-20 22:37:29 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] Video saved to temporary file: temp_videos/3622e9e8-28ea-4e4c-b570-38df953aaf9f.mp4 +2025-08-20 22:37:29 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:37:36 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:37:36 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] 30 frames saved to temp_videos/3622e9e8-28ea-4e4c-b570-38df953aaf9f +2025-08-20 22:37:37 - INFO - Prompt token length: 3604 +2025-08-20 22:37:59 - INFO - Tokens per second: 38.79660135730641, Peak GPU memory MB: 9378.375 +2025-08-20 22:37:59 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] Inference time: 30.01 seconds, CPU usage: 73.4%, CPU core utilization: [75.8, 60.0, 69.3, 88.6] +2025-08-20 22:37:59 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] Cleaned up temporary file: temp_videos/3622e9e8-28ea-4e4c-b570-38df953aaf9f.mp4 +2025-08-20 22:37:59 - INFO - [3622e9e8-28ea-4e4c-b570-38df953aaf9f] Cleaned up temporary frame directory: temp_videos/3622e9e8-28ea-4e4c-b570-38df953aaf9f +2025-08-20 22:37:59 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_021.mp4' +2025-08-20 22:37:59 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] Video saved to temporary file: temp_videos/220a22a6-3f88-431d-abb1-5fe0cd58a911.mp4 +2025-08-20 22:37:59 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:38:07 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:38:07 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] 30 frames saved to temp_videos/220a22a6-3f88-431d-abb1-5fe0cd58a911 +2025-08-20 22:38:07 - INFO - Prompt token length: 3604 +2025-08-20 22:38:29 - INFO - Tokens per second: 39.54199949017956, Peak GPU memory MB: 9378.375 +2025-08-20 22:38:29 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] Inference time: 30.05 seconds, CPU usage: 73.8%, CPU core utilization: [77.7, 70.0, 68.1, 79.3] +2025-08-20 22:38:29 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] Cleaned up temporary file: temp_videos/220a22a6-3f88-431d-abb1-5fe0cd58a911.mp4 +2025-08-20 22:38:29 - INFO - [220a22a6-3f88-431d-abb1-5fe0cd58a911] Cleaned up temporary frame directory: temp_videos/220a22a6-3f88-431d-abb1-5fe0cd58a911 +2025-08-20 22:38:29 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_022.mp4' +2025-08-20 22:38:29 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] Video saved to temporary file: temp_videos/39eff279-d88f-41db-bc04-a620534dd1f6.mp4 +2025-08-20 22:38:29 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:38:36 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:38:36 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] 30 frames saved to temp_videos/39eff279-d88f-41db-bc04-a620534dd1f6 +2025-08-20 22:38:37 - INFO - Prompt token length: 3604 +2025-08-20 22:38:59 - INFO - Tokens per second: 39.230858988985254, Peak GPU memory MB: 9378.375 +2025-08-20 22:38:59 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] Inference time: 29.67 seconds, CPU usage: 76.4%, CPU core utilization: [74.1, 61.5, 77.9, 92.1] +2025-08-20 22:38:59 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] Cleaned up temporary file: temp_videos/39eff279-d88f-41db-bc04-a620534dd1f6.mp4 +2025-08-20 22:38:59 - INFO - [39eff279-d88f-41db-bc04-a620534dd1f6] Cleaned up temporary frame directory: temp_videos/39eff279-d88f-41db-bc04-a620534dd1f6 +2025-08-20 22:38:59 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_022.mp4' +2025-08-20 22:38:59 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] Video saved to temporary file: temp_videos/96fa5647-4edc-4b1e-957e-04c63a7a7a32.mp4 +2025-08-20 22:38:59 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:39:05 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:39:06 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] 30 frames saved to temp_videos/96fa5647-4edc-4b1e-957e-04c63a7a7a32 +2025-08-20 22:39:06 - INFO - Prompt token length: 3604 +2025-08-20 22:39:28 - INFO - Tokens per second: 39.54947543028791, Peak GPU memory MB: 9378.375 +2025-08-20 22:39:28 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] Inference time: 28.73 seconds, CPU usage: 79.0%, CPU core utilization: [77.2, 73.8, 80.5, 84.6] +2025-08-20 22:39:28 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] Cleaned up temporary file: temp_videos/96fa5647-4edc-4b1e-957e-04c63a7a7a32.mp4 +2025-08-20 22:39:28 - INFO - [96fa5647-4edc-4b1e-957e-04c63a7a7a32] Cleaned up temporary frame directory: temp_videos/96fa5647-4edc-4b1e-957e-04c63a7a7a32 +2025-08-20 22:39:28 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_023.mp4' +2025-08-20 22:39:28 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] Video saved to temporary file: temp_videos/adedb259-ffef-494b-9fa8-46d7a7d9436b.mp4 +2025-08-20 22:39:28 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:39:34 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:39:34 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] 30 frames saved to temp_videos/adedb259-ffef-494b-9fa8-46d7a7d9436b +2025-08-20 22:39:35 - INFO - Prompt token length: 3604 +2025-08-20 22:39:55 - INFO - Tokens per second: 40.91406490564467, Peak GPU memory MB: 9378.375 +2025-08-20 22:39:55 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] Inference time: 26.68 seconds, CPU usage: 77.2%, CPU core utilization: [76.4, 80.0, 79.4, 73.1] +2025-08-20 22:39:55 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] Cleaned up temporary file: temp_videos/adedb259-ffef-494b-9fa8-46d7a7d9436b.mp4 +2025-08-20 22:39:55 - INFO - [adedb259-ffef-494b-9fa8-46d7a7d9436b] Cleaned up temporary frame directory: temp_videos/adedb259-ffef-494b-9fa8-46d7a7d9436b +2025-08-20 22:39:55 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_023.mp4' +2025-08-20 22:39:55 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] Video saved to temporary file: temp_videos/7caf9ce9-1fbe-40c3-9ac5-30fbf769f132.mp4 +2025-08-20 22:39:55 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:40:03 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:40:03 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] 30 frames saved to temp_videos/7caf9ce9-1fbe-40c3-9ac5-30fbf769f132 +2025-08-20 22:40:03 - INFO - Prompt token length: 3604 +2025-08-20 22:40:23 - INFO - Tokens per second: 39.12818796943181, Peak GPU memory MB: 9378.375 +2025-08-20 22:40:23 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] Inference time: 28.12 seconds, CPU usage: 78.2%, CPU core utilization: [69.3, 77.7, 88.1, 77.7] +2025-08-20 22:40:23 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] Cleaned up temporary file: temp_videos/7caf9ce9-1fbe-40c3-9ac5-30fbf769f132.mp4 +2025-08-20 22:40:23 - INFO - [7caf9ce9-1fbe-40c3-9ac5-30fbf769f132] Cleaned up temporary frame directory: temp_videos/7caf9ce9-1fbe-40c3-9ac5-30fbf769f132 +2025-08-20 22:40:23 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_024.mp4' +2025-08-20 22:40:23 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] Video saved to temporary file: temp_videos/ff693e7b-6913-4b31-bf2b-acd38601813e.mp4 +2025-08-20 22:40:23 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:40:30 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:40:30 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] 30 frames saved to temp_videos/ff693e7b-6913-4b31-bf2b-acd38601813e +2025-08-20 22:40:30 - INFO - Prompt token length: 3604 +2025-08-20 22:40:49 - INFO - Tokens per second: 40.69113410208588, Peak GPU memory MB: 9378.375 +2025-08-20 22:40:49 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] Inference time: 25.75 seconds, CPU usage: 77.4%, CPU core utilization: [74.5, 66.0, 79.8, 89.1] +2025-08-20 22:40:49 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] Cleaned up temporary file: temp_videos/ff693e7b-6913-4b31-bf2b-acd38601813e.mp4 +2025-08-20 22:40:49 - INFO - [ff693e7b-6913-4b31-bf2b-acd38601813e] Cleaned up temporary frame directory: temp_videos/ff693e7b-6913-4b31-bf2b-acd38601813e +2025-08-20 22:40:49 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_024.mp4' +2025-08-20 22:40:49 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] Video saved to temporary file: temp_videos/5b91e958-1746-43b3-86bb-07e19383d4a8.mp4 +2025-08-20 22:40:49 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:40:57 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:40:57 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] 30 frames saved to temp_videos/5b91e958-1746-43b3-86bb-07e19383d4a8 +2025-08-20 22:40:57 - INFO - Prompt token length: 3604 +2025-08-20 22:41:16 - INFO - Tokens per second: 39.941217640686915, Peak GPU memory MB: 9378.375 +2025-08-20 22:41:16 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] Inference time: 26.89 seconds, CPU usage: 78.8%, CPU core utilization: [83.5, 79.6, 73.1, 79.2] +2025-08-20 22:41:16 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] Cleaned up temporary file: temp_videos/5b91e958-1746-43b3-86bb-07e19383d4a8.mp4 +2025-08-20 22:41:16 - INFO - [5b91e958-1746-43b3-86bb-07e19383d4a8] Cleaned up temporary frame directory: temp_videos/5b91e958-1746-43b3-86bb-07e19383d4a8 +2025-08-20 22:41:16 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_025.mp4' +2025-08-20 22:41:16 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] Video saved to temporary file: temp_videos/95296555-f7d5-42a4-a9bd-09a237cf3307.mp4 +2025-08-20 22:41:16 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:41:23 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:41:23 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] 30 frames saved to temp_videos/95296555-f7d5-42a4-a9bd-09a237cf3307 +2025-08-20 22:41:23 - INFO - Prompt token length: 3604 +2025-08-20 22:41:43 - INFO - Tokens per second: 39.45076495853286, Peak GPU memory MB: 9378.375 +2025-08-20 22:41:43 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] Inference time: 26.88 seconds, CPU usage: 78.6%, CPU core utilization: [73.0, 82.5, 84.3, 74.7] +2025-08-20 22:41:43 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] Cleaned up temporary file: temp_videos/95296555-f7d5-42a4-a9bd-09a237cf3307.mp4 +2025-08-20 22:41:43 - INFO - [95296555-f7d5-42a4-a9bd-09a237cf3307] Cleaned up temporary frame directory: temp_videos/95296555-f7d5-42a4-a9bd-09a237cf3307 +2025-08-20 22:41:43 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_025.mp4' +2025-08-20 22:41:43 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] Video saved to temporary file: temp_videos/48a01780-bac8-4bc4-b674-960e500a485b.mp4 +2025-08-20 22:41:43 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:41:51 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:41:51 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] 30 frames saved to temp_videos/48a01780-bac8-4bc4-b674-960e500a485b +2025-08-20 22:41:51 - INFO - Prompt token length: 3604 +2025-08-20 22:42:11 - INFO - Tokens per second: 40.173796211251464, Peak GPU memory MB: 9378.375 +2025-08-20 22:42:11 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] Inference time: 27.59 seconds, CPU usage: 77.7%, CPU core utilization: [82.9, 62.6, 68.5, 96.5] +2025-08-20 22:42:11 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] Cleaned up temporary file: temp_videos/48a01780-bac8-4bc4-b674-960e500a485b.mp4 +2025-08-20 22:42:11 - INFO - [48a01780-bac8-4bc4-b674-960e500a485b] Cleaned up temporary frame directory: temp_videos/48a01780-bac8-4bc4-b674-960e500a485b +2025-08-20 22:42:11 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_026.mp4' +2025-08-20 22:42:11 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] Video saved to temporary file: temp_videos/10ca2e39-2518-4d95-a25c-3614c6d24b60.mp4 +2025-08-20 22:42:11 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:42:18 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:42:18 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] 30 frames saved to temp_videos/10ca2e39-2518-4d95-a25c-3614c6d24b60 +2025-08-20 22:42:18 - INFO - Prompt token length: 3604 +2025-08-20 22:42:35 - INFO - Tokens per second: 40.88995817929939, Peak GPU memory MB: 9378.375 +2025-08-20 22:42:35 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] Inference time: 24.05 seconds, CPU usage: 77.5%, CPU core utilization: [77.5, 67.6, 77.0, 88.0] +2025-08-20 22:42:35 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] Cleaned up temporary file: temp_videos/10ca2e39-2518-4d95-a25c-3614c6d24b60.mp4 +2025-08-20 22:42:35 - INFO - [10ca2e39-2518-4d95-a25c-3614c6d24b60] Cleaned up temporary frame directory: temp_videos/10ca2e39-2518-4d95-a25c-3614c6d24b60 +2025-08-20 22:42:35 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_026.mp4' +2025-08-20 22:42:36 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] Video saved to temporary file: temp_videos/a2999e49-cb50-4242-866b-34d48bca2545.mp4 +2025-08-20 22:42:36 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:42:42 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:42:42 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] 30 frames saved to temp_videos/a2999e49-cb50-4242-866b-34d48bca2545 +2025-08-20 22:42:42 - INFO - Prompt token length: 3604 +2025-08-20 22:42:59 - INFO - Tokens per second: 41.29902120715977, Peak GPU memory MB: 9378.375 +2025-08-20 22:42:59 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] Inference time: 23.75 seconds, CPU usage: 80.4%, CPU core utilization: [79.6, 84.1, 81.0, 76.9] +2025-08-20 22:42:59 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] Cleaned up temporary file: temp_videos/a2999e49-cb50-4242-866b-34d48bca2545.mp4 +2025-08-20 22:42:59 - INFO - [a2999e49-cb50-4242-866b-34d48bca2545] Cleaned up temporary frame directory: temp_videos/a2999e49-cb50-4242-866b-34d48bca2545 +2025-08-20 22:42:59 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_027.mp4' +2025-08-20 22:43:00 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] Video saved to temporary file: temp_videos/811a63a5-1887-490b-88eb-b873d0d3cf87.mp4 +2025-08-20 22:43:00 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:43:07 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:43:07 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] 30 frames saved to temp_videos/811a63a5-1887-490b-88eb-b873d0d3cf87 +2025-08-20 22:43:08 - INFO - Prompt token length: 3604 +2025-08-20 22:43:26 - INFO - Tokens per second: 38.078800277017294, Peak GPU memory MB: 9378.375 +2025-08-20 22:43:26 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] Inference time: 26.18 seconds, CPU usage: 79.9%, CPU core utilization: [81.3, 65.3, 76.5, 96.5] +2025-08-20 22:43:26 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] Cleaned up temporary file: temp_videos/811a63a5-1887-490b-88eb-b873d0d3cf87.mp4 +2025-08-20 22:43:26 - INFO - [811a63a5-1887-490b-88eb-b873d0d3cf87] Cleaned up temporary frame directory: temp_videos/811a63a5-1887-490b-88eb-b873d0d3cf87 +2025-08-20 22:43:26 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_027.mp4' +2025-08-20 22:43:26 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] Video saved to temporary file: temp_videos/72fb8435-fb20-47b1-a99b-ac9e2d5822e8.mp4 +2025-08-20 22:43:26 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:43:32 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:43:32 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] 30 frames saved to temp_videos/72fb8435-fb20-47b1-a99b-ac9e2d5822e8 +2025-08-20 22:43:33 - INFO - Prompt token length: 3604 +2025-08-20 22:43:50 - INFO - Tokens per second: 41.296913159264854, Peak GPU memory MB: 9378.375 +2025-08-20 22:43:50 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] Inference time: 24.63 seconds, CPU usage: 79.0%, CPU core utilization: [87.2, 77.0, 70.0, 81.6] +2025-08-20 22:43:50 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] Cleaned up temporary file: temp_videos/72fb8435-fb20-47b1-a99b-ac9e2d5822e8.mp4 +2025-08-20 22:43:50 - INFO - [72fb8435-fb20-47b1-a99b-ac9e2d5822e8] Cleaned up temporary frame directory: temp_videos/72fb8435-fb20-47b1-a99b-ac9e2d5822e8 +2025-08-20 22:43:51 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_028.mp4' +2025-08-20 22:43:51 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] Video saved to temporary file: temp_videos/a1923e99-8ead-4886-ae7f-4e0074d6e2d4.mp4 +2025-08-20 22:43:51 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:43:58 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:43:58 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] 30 frames saved to temp_videos/a1923e99-8ead-4886-ae7f-4e0074d6e2d4 +2025-08-20 22:43:59 - INFO - Prompt token length: 3604 +2025-08-20 22:44:20 - INFO - Tokens per second: 38.84482978776854, Peak GPU memory MB: 9378.375 +2025-08-20 22:44:20 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] Inference time: 28.93 seconds, CPU usage: 77.1%, CPU core utilization: [69.0, 81.6, 87.6, 70.1] +2025-08-20 22:44:20 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] Cleaned up temporary file: temp_videos/a1923e99-8ead-4886-ae7f-4e0074d6e2d4.mp4 +2025-08-20 22:44:20 - INFO - [a1923e99-8ead-4886-ae7f-4e0074d6e2d4] Cleaned up temporary frame directory: temp_videos/a1923e99-8ead-4886-ae7f-4e0074d6e2d4 +2025-08-20 22:44:20 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_028.mp4' +2025-08-20 22:44:20 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] Video saved to temporary file: temp_videos/f37e0636-bba0-44e3-923e-6feea37c53a9.mp4 +2025-08-20 22:44:20 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:44:28 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:44:28 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] 30 frames saved to temp_videos/f37e0636-bba0-44e3-923e-6feea37c53a9 +2025-08-20 22:44:28 - INFO - Prompt token length: 3604 +2025-08-20 22:44:49 - INFO - Tokens per second: 39.910911105816794, Peak GPU memory MB: 9378.375 +2025-08-20 22:44:49 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] Inference time: 28.97 seconds, CPU usage: 75.8%, CPU core utilization: [74.4, 74.7, 76.0, 77.9] +2025-08-20 22:44:49 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] Cleaned up temporary file: temp_videos/f37e0636-bba0-44e3-923e-6feea37c53a9.mp4 +2025-08-20 22:44:49 - INFO - [f37e0636-bba0-44e3-923e-6feea37c53a9] Cleaned up temporary frame directory: temp_videos/f37e0636-bba0-44e3-923e-6feea37c53a9 +2025-08-20 22:44:49 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_029.mp4' +2025-08-20 22:44:49 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] Video saved to temporary file: temp_videos/d9d5eda4-26ed-419e-8aa2-9697fba9dd31.mp4 +2025-08-20 22:44:49 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:44:58 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:44:58 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] 30 frames saved to temp_videos/d9d5eda4-26ed-419e-8aa2-9697fba9dd31 +2025-08-20 22:44:59 - INFO - Prompt token length: 3604 +2025-08-20 22:45:18 - INFO - Tokens per second: 39.2671771929051, Peak GPU memory MB: 9378.375 +2025-08-20 22:45:18 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] Inference time: 29.12 seconds, CPU usage: 75.9%, CPU core utilization: [73.0, 62.7, 76.9, 90.8] +2025-08-20 22:45:18 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] Cleaned up temporary file: temp_videos/d9d5eda4-26ed-419e-8aa2-9697fba9dd31.mp4 +2025-08-20 22:45:18 - INFO - [d9d5eda4-26ed-419e-8aa2-9697fba9dd31] Cleaned up temporary frame directory: temp_videos/d9d5eda4-26ed-419e-8aa2-9697fba9dd31 +2025-08-20 22:45:18 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_029.mp4' +2025-08-20 22:45:18 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] Video saved to temporary file: temp_videos/0156c3c5-ba34-4a33-a89a-de08f834c444.mp4 +2025-08-20 22:45:18 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:45:25 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:45:25 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] 30 frames saved to temp_videos/0156c3c5-ba34-4a33-a89a-de08f834c444 +2025-08-20 22:45:26 - INFO - Prompt token length: 3604 +2025-08-20 22:45:45 - INFO - Tokens per second: 38.770346069233774, Peak GPU memory MB: 9378.375 +2025-08-20 22:45:45 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] Inference time: 26.91 seconds, CPU usage: 77.2%, CPU core utilization: [77.0, 73.5, 76.6, 81.9] +2025-08-20 22:45:45 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] Cleaned up temporary file: temp_videos/0156c3c5-ba34-4a33-a89a-de08f834c444.mp4 +2025-08-20 22:45:45 - INFO - [0156c3c5-ba34-4a33-a89a-de08f834c444] Cleaned up temporary frame directory: temp_videos/0156c3c5-ba34-4a33-a89a-de08f834c444 +2025-08-20 22:45:46 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_030.mp4' +2025-08-20 22:45:46 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] Video saved to temporary file: temp_videos/f29f39ce-5322-4289-8a8e-dd1d548e0c1e.mp4 +2025-08-20 22:45:46 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:45:52 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:45:52 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] 30 frames saved to temp_videos/f29f39ce-5322-4289-8a8e-dd1d548e0c1e +2025-08-20 22:45:52 - INFO - Prompt token length: 3604 +2025-08-20 22:46:10 - INFO - Tokens per second: 37.574975679061765, Peak GPU memory MB: 9378.375 +2025-08-20 22:46:10 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] Inference time: 24.52 seconds, CPU usage: 76.4%, CPU core utilization: [73.2, 76.8, 78.6, 76.9] +2025-08-20 22:46:10 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] Cleaned up temporary file: temp_videos/f29f39ce-5322-4289-8a8e-dd1d548e0c1e.mp4 +2025-08-20 22:46:10 - INFO - [f29f39ce-5322-4289-8a8e-dd1d548e0c1e] Cleaned up temporary frame directory: temp_videos/f29f39ce-5322-4289-8a8e-dd1d548e0c1e +2025-08-20 22:46:10 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_030.mp4' +2025-08-20 22:46:10 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] Video saved to temporary file: temp_videos/701e416c-0359-4f4c-ba05-8b8f7ec8434b.mp4 +2025-08-20 22:46:10 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:46:17 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:46:17 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] 30 frames saved to temp_videos/701e416c-0359-4f4c-ba05-8b8f7ec8434b +2025-08-20 22:46:18 - INFO - Prompt token length: 3604 +2025-08-20 22:46:35 - INFO - Tokens per second: 38.867499480898886, Peak GPU memory MB: 9378.375 +2025-08-20 22:46:35 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] Inference time: 24.94 seconds, CPU usage: 82.0%, CPU core utilization: [85.2, 68.3, 78.1, 96.2] +2025-08-20 22:46:35 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] Cleaned up temporary file: temp_videos/701e416c-0359-4f4c-ba05-8b8f7ec8434b.mp4 +2025-08-20 22:46:35 - INFO - [701e416c-0359-4f4c-ba05-8b8f7ec8434b] Cleaned up temporary frame directory: temp_videos/701e416c-0359-4f4c-ba05-8b8f7ec8434b +2025-08-20 22:46:35 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_031.mp4' +2025-08-20 22:46:36 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] Video saved to temporary file: temp_videos/7d13050f-0acf-4140-b270-4f3a01ea84ad.mp4 +2025-08-20 22:46:36 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:46:42 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:46:42 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] 30 frames saved to temp_videos/7d13050f-0acf-4140-b270-4f3a01ea84ad +2025-08-20 22:46:43 - INFO - Prompt token length: 3604 +2025-08-20 22:47:02 - INFO - Tokens per second: 42.473264827377115, Peak GPU memory MB: 9378.375 +2025-08-20 22:47:02 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] Inference time: 26.92 seconds, CPU usage: 62.5%, CPU core utilization: [60.4, 57.0, 51.6, 81.0] +2025-08-20 22:47:02 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] Cleaned up temporary file: temp_videos/7d13050f-0acf-4140-b270-4f3a01ea84ad.mp4 +2025-08-20 22:47:02 - INFO - [7d13050f-0acf-4140-b270-4f3a01ea84ad] Cleaned up temporary frame directory: temp_videos/7d13050f-0acf-4140-b270-4f3a01ea84ad +2025-08-20 22:47:03 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_031.mp4' +2025-08-20 22:47:03 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] Video saved to temporary file: temp_videos/81e5e9db-81e6-4b5c-aae9-e808434e01c0.mp4 +2025-08-20 22:47:03 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:47:07 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:47:07 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] 30 frames saved to temp_videos/81e5e9db-81e6-4b5c-aae9-e808434e01c0 +2025-08-20 22:47:08 - INFO - Prompt token length: 3604 +2025-08-20 22:47:27 - INFO - Tokens per second: 42.60848705585198, Peak GPU memory MB: 9378.375 +2025-08-20 22:47:27 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] Inference time: 24.63 seconds, CPU usage: 38.9%, CPU core utilization: [21.8, 63.8, 20.6, 49.7] +2025-08-20 22:47:27 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] Cleaned up temporary file: temp_videos/81e5e9db-81e6-4b5c-aae9-e808434e01c0.mp4 +2025-08-20 22:47:27 - INFO - [81e5e9db-81e6-4b5c-aae9-e808434e01c0] Cleaned up temporary frame directory: temp_videos/81e5e9db-81e6-4b5c-aae9-e808434e01c0 +2025-08-20 22:47:27 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_032.mp4' +2025-08-20 22:47:27 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] Video saved to temporary file: temp_videos/26b2aa09-2296-48d2-9ccf-71573c8c8f65.mp4 +2025-08-20 22:47:27 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:47:32 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:47:32 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] 30 frames saved to temp_videos/26b2aa09-2296-48d2-9ccf-71573c8c8f65 +2025-08-20 22:47:33 - INFO - Prompt token length: 3604 +2025-08-20 22:47:53 - INFO - Tokens per second: 40.80785392093638, Peak GPU memory MB: 9378.375 +2025-08-20 22:47:53 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] Inference time: 25.82 seconds, CPU usage: 44.1%, CPU core utilization: [51.5, 31.5, 66.0, 27.2] +2025-08-20 22:47:53 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] Cleaned up temporary file: temp_videos/26b2aa09-2296-48d2-9ccf-71573c8c8f65.mp4 +2025-08-20 22:47:53 - INFO - [26b2aa09-2296-48d2-9ccf-71573c8c8f65] Cleaned up temporary frame directory: temp_videos/26b2aa09-2296-48d2-9ccf-71573c8c8f65 +2025-08-20 22:47:53 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_032.mp4' +2025-08-20 22:47:53 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] Video saved to temporary file: temp_videos/d521c0c1-df0f-4a77-98fe-904531631498.mp4 +2025-08-20 22:47:53 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:47:58 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:47:58 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] 30 frames saved to temp_videos/d521c0c1-df0f-4a77-98fe-904531631498 +2025-08-20 22:47:59 - INFO - Prompt token length: 3604 +2025-08-20 22:48:19 - INFO - Tokens per second: 41.68167505655786, Peak GPU memory MB: 9378.375 +2025-08-20 22:48:19 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] Inference time: 25.70 seconds, CPU usage: 55.5%, CPU core utilization: [41.3, 65.4, 56.7, 58.6] +2025-08-20 22:48:19 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] Cleaned up temporary file: temp_videos/d521c0c1-df0f-4a77-98fe-904531631498.mp4 +2025-08-20 22:48:19 - INFO - [d521c0c1-df0f-4a77-98fe-904531631498] Cleaned up temporary frame directory: temp_videos/d521c0c1-df0f-4a77-98fe-904531631498 +2025-08-20 22:48:19 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_033.mp4' +2025-08-20 22:48:19 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] Video saved to temporary file: temp_videos/34958ac9-b7d7-4bbf-820a-41b304ebf7e5.mp4 +2025-08-20 22:48:19 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:48:24 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:48:24 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] 30 frames saved to temp_videos/34958ac9-b7d7-4bbf-820a-41b304ebf7e5 +2025-08-20 22:48:24 - INFO - Prompt token length: 3604 +2025-08-20 22:48:41 - INFO - Tokens per second: 42.93652782490678, Peak GPU memory MB: 9378.375 +2025-08-20 22:48:41 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] Inference time: 21.99 seconds, CPU usage: 39.5%, CPU core utilization: [21.5, 55.4, 19.6, 61.4] +2025-08-20 22:48:41 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] Cleaned up temporary file: temp_videos/34958ac9-b7d7-4bbf-820a-41b304ebf7e5.mp4 +2025-08-20 22:48:41 - INFO - [34958ac9-b7d7-4bbf-820a-41b304ebf7e5] Cleaned up temporary frame directory: temp_videos/34958ac9-b7d7-4bbf-820a-41b304ebf7e5 +2025-08-20 22:48:41 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_033.mp4' +2025-08-20 22:48:41 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] Video saved to temporary file: temp_videos/bb49e37e-2d52-4e0e-819d-740e1fd7da79.mp4 +2025-08-20 22:48:41 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:48:46 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:48:46 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] 30 frames saved to temp_videos/bb49e37e-2d52-4e0e-819d-740e1fd7da79 +2025-08-20 22:48:47 - INFO - Prompt token length: 3604 +2025-08-20 22:49:03 - INFO - Tokens per second: 43.18803826536313, Peak GPU memory MB: 9378.375 +2025-08-20 22:49:03 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] Inference time: 22.06 seconds, CPU usage: 39.8%, CPU core utilization: [22.0, 21.8, 95.1, 20.1] +2025-08-20 22:49:03 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] Cleaned up temporary file: temp_videos/bb49e37e-2d52-4e0e-819d-740e1fd7da79.mp4 +2025-08-20 22:49:03 - INFO - [bb49e37e-2d52-4e0e-819d-740e1fd7da79] Cleaned up temporary frame directory: temp_videos/bb49e37e-2d52-4e0e-819d-740e1fd7da79 +2025-08-20 22:49:04 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_034.mp4' +2025-08-20 22:49:04 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] Video saved to temporary file: temp_videos/8944c9b5-003e-4f7f-b955-f0d3a1242c6c.mp4 +2025-08-20 22:49:04 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:49:09 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:49:09 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] 30 frames saved to temp_videos/8944c9b5-003e-4f7f-b955-f0d3a1242c6c +2025-08-20 22:49:09 - INFO - Prompt token length: 3604 +2025-08-20 22:49:28 - INFO - Tokens per second: 42.55636672195234, Peak GPU memory MB: 9378.375 +2025-08-20 22:49:28 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] Inference time: 24.50 seconds, CPU usage: 39.2%, CPU core utilization: [26.4, 54.9, 21.9, 53.6] +2025-08-20 22:49:28 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] Cleaned up temporary file: temp_videos/8944c9b5-003e-4f7f-b955-f0d3a1242c6c.mp4 +2025-08-20 22:49:28 - INFO - [8944c9b5-003e-4f7f-b955-f0d3a1242c6c] Cleaned up temporary frame directory: temp_videos/8944c9b5-003e-4f7f-b955-f0d3a1242c6c +2025-08-20 22:49:28 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_034.mp4' +2025-08-20 22:49:28 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] Video saved to temporary file: temp_videos/2fc4f4a7-985e-4444-b521-6f05e6ddd98b.mp4 +2025-08-20 22:49:28 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:49:33 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:49:33 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] 30 frames saved to temp_videos/2fc4f4a7-985e-4444-b521-6f05e6ddd98b +2025-08-20 22:49:34 - INFO - Prompt token length: 3604 +2025-08-20 22:49:53 - INFO - Tokens per second: 42.77388431601942, Peak GPU memory MB: 9378.375 +2025-08-20 22:49:53 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] Inference time: 24.49 seconds, CPU usage: 50.7%, CPU core utilization: [48.9, 68.1, 50.9, 35.1] +2025-08-20 22:49:53 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] Cleaned up temporary file: temp_videos/2fc4f4a7-985e-4444-b521-6f05e6ddd98b.mp4 +2025-08-20 22:49:53 - INFO - [2fc4f4a7-985e-4444-b521-6f05e6ddd98b] Cleaned up temporary frame directory: temp_videos/2fc4f4a7-985e-4444-b521-6f05e6ddd98b +2025-08-20 22:49:53 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_035.mp4' +2025-08-20 22:49:53 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] Video saved to temporary file: temp_videos/3b0fde39-fa83-45a9-9100-4255f9ed00b4.mp4 +2025-08-20 22:49:53 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:49:58 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:49:58 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] 30 frames saved to temp_videos/3b0fde39-fa83-45a9-9100-4255f9ed00b4 +2025-08-20 22:49:58 - INFO - Prompt token length: 3604 +2025-08-20 22:50:19 - INFO - Tokens per second: 42.49865841788772, Peak GPU memory MB: 9378.375 +2025-08-20 22:50:19 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] Inference time: 25.52 seconds, CPU usage: 38.1%, CPU core utilization: [19.0, 77.5, 19.1, 36.8] +2025-08-20 22:50:19 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] Cleaned up temporary file: temp_videos/3b0fde39-fa83-45a9-9100-4255f9ed00b4.mp4 +2025-08-20 22:50:19 - INFO - [3b0fde39-fa83-45a9-9100-4255f9ed00b4] Cleaned up temporary frame directory: temp_videos/3b0fde39-fa83-45a9-9100-4255f9ed00b4 +2025-08-20 22:50:19 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_035.mp4' +2025-08-20 22:50:19 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] Video saved to temporary file: temp_videos/d3e3c964-fc52-45f6-83e8-6208ba813345.mp4 +2025-08-20 22:50:19 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:50:24 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:50:24 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] 30 frames saved to temp_videos/d3e3c964-fc52-45f6-83e8-6208ba813345 +2025-08-20 22:50:24 - INFO - Prompt token length: 3604 +2025-08-20 22:50:44 - INFO - Tokens per second: 42.56102609895685, Peak GPU memory MB: 9378.375 +2025-08-20 22:50:44 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] Inference time: 25.44 seconds, CPU usage: 38.0%, CPU core utilization: [56.1, 44.5, 33.2, 18.2] +2025-08-20 22:50:44 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] Cleaned up temporary file: temp_videos/d3e3c964-fc52-45f6-83e8-6208ba813345.mp4 +2025-08-20 22:50:44 - INFO - [d3e3c964-fc52-45f6-83e8-6208ba813345] Cleaned up temporary frame directory: temp_videos/d3e3c964-fc52-45f6-83e8-6208ba813345 +2025-08-20 22:50:44 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_036.mp4' +2025-08-20 22:50:44 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] Video saved to temporary file: temp_videos/05601db3-ba4c-405e-bbc0-982185b86516.mp4 +2025-08-20 22:50:44 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:50:49 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:50:49 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] 30 frames saved to temp_videos/05601db3-ba4c-405e-bbc0-982185b86516 +2025-08-20 22:50:50 - INFO - Prompt token length: 3604 +2025-08-20 22:51:07 - INFO - Tokens per second: 42.71259057251842, Peak GPU memory MB: 9378.375 +2025-08-20 22:51:07 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] Inference time: 22.76 seconds, CPU usage: 39.3%, CPU core utilization: [31.6, 20.0, 76.4, 28.9] +2025-08-20 22:51:07 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] Cleaned up temporary file: temp_videos/05601db3-ba4c-405e-bbc0-982185b86516.mp4 +2025-08-20 22:51:07 - INFO - [05601db3-ba4c-405e-bbc0-982185b86516] Cleaned up temporary frame directory: temp_videos/05601db3-ba4c-405e-bbc0-982185b86516 +2025-08-20 22:51:07 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_036.mp4' +2025-08-20 22:51:07 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] Video saved to temporary file: temp_videos/d3e8e022-c4df-4844-a3c6-6f0838a0a72d.mp4 +2025-08-20 22:51:07 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:51:12 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:51:12 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] 30 frames saved to temp_videos/d3e8e022-c4df-4844-a3c6-6f0838a0a72d +2025-08-20 22:51:13 - INFO - Prompt token length: 3604 +2025-08-20 22:51:30 - INFO - Tokens per second: 43.1751414519081, Peak GPU memory MB: 9378.375 +2025-08-20 22:51:30 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] Inference time: 22.83 seconds, CPU usage: 39.9%, CPU core utilization: [38.8, 21.6, 78.3, 20.7] +2025-08-20 22:51:30 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] Cleaned up temporary file: temp_videos/d3e8e022-c4df-4844-a3c6-6f0838a0a72d.mp4 +2025-08-20 22:51:30 - INFO - [d3e8e022-c4df-4844-a3c6-6f0838a0a72d] Cleaned up temporary frame directory: temp_videos/d3e8e022-c4df-4844-a3c6-6f0838a0a72d +2025-08-20 22:51:30 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_037.mp4' +2025-08-20 22:51:30 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] Video saved to temporary file: temp_videos/67cebe22-e989-4767-a814-edf257b2eb99.mp4 +2025-08-20 22:51:30 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:51:35 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:51:35 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] 30 frames saved to temp_videos/67cebe22-e989-4767-a814-edf257b2eb99 +2025-08-20 22:51:36 - INFO - Prompt token length: 3604 +2025-08-20 22:51:54 - INFO - Tokens per second: 42.99028640429355, Peak GPU memory MB: 9378.375 +2025-08-20 22:51:54 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] Inference time: 23.24 seconds, CPU usage: 39.1%, CPU core utilization: [53.7, 27.7, 52.4, 22.5] +2025-08-20 22:51:54 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] Cleaned up temporary file: temp_videos/67cebe22-e989-4767-a814-edf257b2eb99.mp4 +2025-08-20 22:51:54 - INFO - [67cebe22-e989-4767-a814-edf257b2eb99] Cleaned up temporary frame directory: temp_videos/67cebe22-e989-4767-a814-edf257b2eb99 +2025-08-20 22:51:54 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_037.mp4' +2025-08-20 22:51:54 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] Video saved to temporary file: temp_videos/c3876a59-c2a5-4f8a-ae78-8f2b40b834b5.mp4 +2025-08-20 22:51:54 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:51:59 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:51:59 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] 30 frames saved to temp_videos/c3876a59-c2a5-4f8a-ae78-8f2b40b834b5 +2025-08-20 22:51:59 - INFO - Prompt token length: 3604 +2025-08-20 22:52:17 - INFO - Tokens per second: 42.91101890756983, Peak GPU memory MB: 9378.375 +2025-08-20 22:52:17 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] Inference time: 23.26 seconds, CPU usage: 39.4%, CPU core utilization: [20.5, 55.9, 21.0, 60.1] +2025-08-20 22:52:17 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] Cleaned up temporary file: temp_videos/c3876a59-c2a5-4f8a-ae78-8f2b40b834b5.mp4 +2025-08-20 22:52:17 - INFO - [c3876a59-c2a5-4f8a-ae78-8f2b40b834b5] Cleaned up temporary frame directory: temp_videos/c3876a59-c2a5-4f8a-ae78-8f2b40b834b5 +2025-08-20 22:52:17 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_038.mp4' +2025-08-20 22:52:17 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] Video saved to temporary file: temp_videos/d07c5198-346d-4745-a0bb-2c05890908a9.mp4 +2025-08-20 22:52:17 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:52:22 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:52:22 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] 30 frames saved to temp_videos/d07c5198-346d-4745-a0bb-2c05890908a9 +2025-08-20 22:52:22 - INFO - Prompt token length: 3604 +2025-08-20 22:52:41 - INFO - Tokens per second: 42.56079816289042, Peak GPU memory MB: 9378.375 +2025-08-20 22:52:41 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] Inference time: 23.32 seconds, CPU usage: 38.9%, CPU core utilization: [40.0, 19.9, 63.4, 32.4] +2025-08-20 22:52:41 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] Cleaned up temporary file: temp_videos/d07c5198-346d-4745-a0bb-2c05890908a9.mp4 +2025-08-20 22:52:41 - INFO - [d07c5198-346d-4745-a0bb-2c05890908a9] Cleaned up temporary frame directory: temp_videos/d07c5198-346d-4745-a0bb-2c05890908a9 +2025-08-20 22:52:41 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_038.mp4' +2025-08-20 22:52:41 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] Video saved to temporary file: temp_videos/2e1a282f-c384-44ad-acdd-7bc9b75084da.mp4 +2025-08-20 22:52:41 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:52:46 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:52:46 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] 30 frames saved to temp_videos/2e1a282f-c384-44ad-acdd-7bc9b75084da +2025-08-20 22:52:46 - INFO - Prompt token length: 3604 +2025-08-20 22:53:04 - INFO - Tokens per second: 43.039363167119106, Peak GPU memory MB: 9378.375 +2025-08-20 22:53:04 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] Inference time: 23.33 seconds, CPU usage: 39.0%, CPU core utilization: [32.3, 46.4, 47.3, 29.8] +2025-08-20 22:53:04 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] Cleaned up temporary file: temp_videos/2e1a282f-c384-44ad-acdd-7bc9b75084da.mp4 +2025-08-20 22:53:04 - INFO - [2e1a282f-c384-44ad-acdd-7bc9b75084da] Cleaned up temporary frame directory: temp_videos/2e1a282f-c384-44ad-acdd-7bc9b75084da +2025-08-20 22:53:04 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_039.mp4' +2025-08-20 22:53:04 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] Video saved to temporary file: temp_videos/b8191900-0e19-491d-8a46-47212e0143fb.mp4 +2025-08-20 22:53:04 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:53:09 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:53:09 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] 30 frames saved to temp_videos/b8191900-0e19-491d-8a46-47212e0143fb +2025-08-20 22:53:09 - INFO - Prompt token length: 3604 +2025-08-20 22:53:29 - INFO - Tokens per second: 42.75833952241025, Peak GPU memory MB: 9378.375 +2025-08-20 22:53:29 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] Inference time: 24.55 seconds, CPU usage: 38.6%, CPU core utilization: [20.2, 25.7, 89.8, 18.7] +2025-08-20 22:53:29 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] Cleaned up temporary file: temp_videos/b8191900-0e19-491d-8a46-47212e0143fb.mp4 +2025-08-20 22:53:29 - INFO - [b8191900-0e19-491d-8a46-47212e0143fb] Cleaned up temporary frame directory: temp_videos/b8191900-0e19-491d-8a46-47212e0143fb +2025-08-20 22:53:29 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_039.mp4' +2025-08-20 22:53:29 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] Video saved to temporary file: temp_videos/910d3fc2-1d5c-40b3-856f-02012cfbe592.mp4 +2025-08-20 22:53:29 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:53:34 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:53:34 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] 30 frames saved to temp_videos/910d3fc2-1d5c-40b3-856f-02012cfbe592 +2025-08-20 22:53:34 - INFO - Prompt token length: 3604 +2025-08-20 22:53:53 - INFO - Tokens per second: 43.06007293674169, Peak GPU memory MB: 9378.375 +2025-08-20 22:53:53 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] Inference time: 24.49 seconds, CPU usage: 38.6%, CPU core utilization: [28.2, 20.0, 86.1, 20.0] +2025-08-20 22:53:54 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] Cleaned up temporary file: temp_videos/910d3fc2-1d5c-40b3-856f-02012cfbe592.mp4 +2025-08-20 22:53:54 - INFO - [910d3fc2-1d5c-40b3-856f-02012cfbe592] Cleaned up temporary frame directory: temp_videos/910d3fc2-1d5c-40b3-856f-02012cfbe592 +2025-08-20 22:53:54 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_040.mp4' +2025-08-20 22:53:54 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] Video saved to temporary file: temp_videos/a552571d-2073-4688-84ba-f2e3fe36e9f5.mp4 +2025-08-20 22:53:54 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:53:59 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:53:59 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] 30 frames saved to temp_videos/a552571d-2073-4688-84ba-f2e3fe36e9f5 +2025-08-20 22:53:59 - INFO - Prompt token length: 3604 +2025-08-20 22:54:16 - INFO - Tokens per second: 43.14582941802569, Peak GPU memory MB: 9378.375 +2025-08-20 22:54:16 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] Inference time: 22.64 seconds, CPU usage: 39.7%, CPU core utilization: [69.3, 21.9, 47.1, 20.1] +2025-08-20 22:54:16 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] Cleaned up temporary file: temp_videos/a552571d-2073-4688-84ba-f2e3fe36e9f5.mp4 +2025-08-20 22:54:16 - INFO - [a552571d-2073-4688-84ba-f2e3fe36e9f5] Cleaned up temporary frame directory: temp_videos/a552571d-2073-4688-84ba-f2e3fe36e9f5 +2025-08-20 22:54:17 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_040.mp4' +2025-08-20 22:54:17 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] Video saved to temporary file: temp_videos/7a3804f8-f643-49b6-ac31-fdfe9917bb44.mp4 +2025-08-20 22:54:17 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:54:21 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:54:21 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] 30 frames saved to temp_videos/7a3804f8-f643-49b6-ac31-fdfe9917bb44 +2025-08-20 22:54:22 - INFO - Prompt token length: 3604 +2025-08-20 22:54:39 - INFO - Tokens per second: 42.90120292725611, Peak GPU memory MB: 9378.375 +2025-08-20 22:54:39 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] Inference time: 22.56 seconds, CPU usage: 39.4%, CPU core utilization: [23.0, 33.0, 19.8, 81.8] +2025-08-20 22:54:39 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] Cleaned up temporary file: temp_videos/7a3804f8-f643-49b6-ac31-fdfe9917bb44.mp4 +2025-08-20 22:54:39 - INFO - [7a3804f8-f643-49b6-ac31-fdfe9917bb44] Cleaned up temporary frame directory: temp_videos/7a3804f8-f643-49b6-ac31-fdfe9917bb44 +2025-08-20 22:54:39 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_041.mp4' +2025-08-20 22:54:39 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] Video saved to temporary file: temp_videos/29e1bbd2-43fc-421e-8ec6-e4426f910645.mp4 +2025-08-20 22:54:39 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:54:44 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:54:44 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] 30 frames saved to temp_videos/29e1bbd2-43fc-421e-8ec6-e4426f910645 +2025-08-20 22:54:45 - INFO - Prompt token length: 3604 +2025-08-20 22:55:01 - INFO - Tokens per second: 42.74904721881862, Peak GPU memory MB: 9378.375 +2025-08-20 22:55:01 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] Inference time: 21.91 seconds, CPU usage: 39.5%, CPU core utilization: [20.1, 51.4, 21.5, 64.9] +2025-08-20 22:55:01 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] Cleaned up temporary file: temp_videos/29e1bbd2-43fc-421e-8ec6-e4426f910645.mp4 +2025-08-20 22:55:01 - INFO - [29e1bbd2-43fc-421e-8ec6-e4426f910645] Cleaned up temporary frame directory: temp_videos/29e1bbd2-43fc-421e-8ec6-e4426f910645 +2025-08-20 22:55:01 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_041.mp4' +2025-08-20 22:55:01 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] Video saved to temporary file: temp_videos/a180e8ab-6e30-47d1-8754-b84cbf712b15.mp4 +2025-08-20 22:55:01 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:55:06 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:55:06 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] 30 frames saved to temp_videos/a180e8ab-6e30-47d1-8754-b84cbf712b15 +2025-08-20 22:55:07 - INFO - Prompt token length: 3604 +2025-08-20 22:55:23 - INFO - Tokens per second: 43.534909908199374, Peak GPU memory MB: 9378.375 +2025-08-20 22:55:23 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] Inference time: 21.86 seconds, CPU usage: 40.0%, CPU core utilization: [26.5, 21.5, 90.0, 21.9] +2025-08-20 22:55:23 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] Cleaned up temporary file: temp_videos/a180e8ab-6e30-47d1-8754-b84cbf712b15.mp4 +2025-08-20 22:55:23 - INFO - [a180e8ab-6e30-47d1-8754-b84cbf712b15] Cleaned up temporary frame directory: temp_videos/a180e8ab-6e30-47d1-8754-b84cbf712b15 +2025-08-20 22:55:23 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_042.mp4' +2025-08-20 22:55:24 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] Video saved to temporary file: temp_videos/2fd14dbf-6cab-4d1d-8cfa-9d16543ad180.mp4 +2025-08-20 22:55:24 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:55:28 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:55:28 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] 30 frames saved to temp_videos/2fd14dbf-6cab-4d1d-8cfa-9d16543ad180 +2025-08-20 22:55:29 - INFO - Prompt token length: 3604 +2025-08-20 22:55:46 - INFO - Tokens per second: 42.738544378380034, Peak GPU memory MB: 9378.375 +2025-08-20 22:55:46 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] Inference time: 22.71 seconds, CPU usage: 39.2%, CPU core utilization: [27.0, 85.7, 21.2, 22.8] +2025-08-20 22:55:46 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] Cleaned up temporary file: temp_videos/2fd14dbf-6cab-4d1d-8cfa-9d16543ad180.mp4 +2025-08-20 22:55:46 - INFO - [2fd14dbf-6cab-4d1d-8cfa-9d16543ad180] Cleaned up temporary frame directory: temp_videos/2fd14dbf-6cab-4d1d-8cfa-9d16543ad180 +2025-08-20 22:55:46 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_042.mp4' +2025-08-20 22:55:46 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] Video saved to temporary file: temp_videos/041c0c6f-29fe-4509-a5e2-a906b10e0b9b.mp4 +2025-08-20 22:55:46 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:55:51 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:55:51 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] 30 frames saved to temp_videos/041c0c6f-29fe-4509-a5e2-a906b10e0b9b +2025-08-20 22:55:51 - INFO - Prompt token length: 3604 +2025-08-20 22:56:09 - INFO - Tokens per second: 42.90742954996482, Peak GPU memory MB: 9378.375 +2025-08-20 22:56:09 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] Inference time: 22.72 seconds, CPU usage: 39.0%, CPU core utilization: [20.2, 51.9, 20.0, 64.1] +2025-08-20 22:56:09 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] Cleaned up temporary file: temp_videos/041c0c6f-29fe-4509-a5e2-a906b10e0b9b.mp4 +2025-08-20 22:56:09 - INFO - [041c0c6f-29fe-4509-a5e2-a906b10e0b9b] Cleaned up temporary frame directory: temp_videos/041c0c6f-29fe-4509-a5e2-a906b10e0b9b +2025-08-20 22:56:09 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_043.mp4' +2025-08-20 22:56:09 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] Video saved to temporary file: temp_videos/5e655c1b-8f68-48b8-821b-39192e744ded.mp4 +2025-08-20 22:56:09 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:56:14 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:56:14 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] 30 frames saved to temp_videos/5e655c1b-8f68-48b8-821b-39192e744ded +2025-08-20 22:56:15 - INFO - Prompt token length: 3604 +2025-08-20 22:56:33 - INFO - Tokens per second: 42.936896064627625, Peak GPU memory MB: 9378.375 +2025-08-20 22:56:33 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] Inference time: 24.11 seconds, CPU usage: 38.7%, CPU core utilization: [64.6, 19.5, 51.1, 19.2] +2025-08-20 22:56:33 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] Cleaned up temporary file: temp_videos/5e655c1b-8f68-48b8-821b-39192e744ded.mp4 +2025-08-20 22:56:33 - INFO - [5e655c1b-8f68-48b8-821b-39192e744ded] Cleaned up temporary frame directory: temp_videos/5e655c1b-8f68-48b8-821b-39192e744ded +2025-08-20 22:56:33 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_043.mp4' +2025-08-20 22:56:34 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] Video saved to temporary file: temp_videos/4854dfea-d0be-4393-944b-e0ae11c25bb2.mp4 +2025-08-20 22:56:34 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:56:38 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:56:38 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] 30 frames saved to temp_videos/4854dfea-d0be-4393-944b-e0ae11c25bb2 +2025-08-20 22:56:39 - INFO - Prompt token length: 3604 +2025-08-20 22:56:57 - INFO - Tokens per second: 42.647811292769354, Peak GPU memory MB: 9378.375 +2025-08-20 22:56:57 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] Inference time: 24.02 seconds, CPU usage: 38.6%, CPU core utilization: [29.3, 66.2, 19.3, 39.4] +2025-08-20 22:56:57 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] Cleaned up temporary file: temp_videos/4854dfea-d0be-4393-944b-e0ae11c25bb2.mp4 +2025-08-20 22:56:57 - INFO - [4854dfea-d0be-4393-944b-e0ae11c25bb2] Cleaned up temporary frame directory: temp_videos/4854dfea-d0be-4393-944b-e0ae11c25bb2 +2025-08-20 22:56:58 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_044.mp4' +2025-08-20 22:56:58 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] Video saved to temporary file: temp_videos/3ac0d962-5cd6-4a52-b217-201197b1652f.mp4 +2025-08-20 22:56:58 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:57:03 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:57:03 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] 30 frames saved to temp_videos/3ac0d962-5cd6-4a52-b217-201197b1652f +2025-08-20 22:57:03 - INFO - Prompt token length: 3604 +2025-08-20 22:57:23 - INFO - Tokens per second: 42.76047099874616, Peak GPU memory MB: 9378.375 +2025-08-20 22:57:23 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] Inference time: 25.45 seconds, CPU usage: 38.0%, CPU core utilization: [38.5, 39.9, 51.3, 22.5] +2025-08-20 22:57:23 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] Cleaned up temporary file: temp_videos/3ac0d962-5cd6-4a52-b217-201197b1652f.mp4 +2025-08-20 22:57:23 - INFO - [3ac0d962-5cd6-4a52-b217-201197b1652f] Cleaned up temporary frame directory: temp_videos/3ac0d962-5cd6-4a52-b217-201197b1652f +2025-08-20 22:57:23 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_044.mp4' +2025-08-20 22:57:23 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] Video saved to temporary file: temp_videos/21610b16-8e9e-489f-b78a-a3cfc6fa85ef.mp4 +2025-08-20 22:57:23 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:57:28 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:57:28 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] 30 frames saved to temp_videos/21610b16-8e9e-489f-b78a-a3cfc6fa85ef +2025-08-20 22:57:28 - INFO - Prompt token length: 3604 +2025-08-20 22:57:49 - INFO - Tokens per second: 42.59159214178798, Peak GPU memory MB: 9378.375 +2025-08-20 22:57:49 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] Inference time: 25.50 seconds, CPU usage: 38.1%, CPU core utilization: [65.0, 33.4, 35.0, 18.8] +2025-08-20 22:57:49 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] Cleaned up temporary file: temp_videos/21610b16-8e9e-489f-b78a-a3cfc6fa85ef.mp4 +2025-08-20 22:57:49 - INFO - [21610b16-8e9e-489f-b78a-a3cfc6fa85ef] Cleaned up temporary frame directory: temp_videos/21610b16-8e9e-489f-b78a-a3cfc6fa85ef +2025-08-20 22:57:49 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_045.mp4' +2025-08-20 22:57:49 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] Video saved to temporary file: temp_videos/feb0c88d-775a-48d3-b753-f84bd9a71c31.mp4 +2025-08-20 22:57:49 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:57:54 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:57:54 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] 30 frames saved to temp_videos/feb0c88d-775a-48d3-b753-f84bd9a71c31 +2025-08-20 22:57:54 - INFO - Prompt token length: 3604 +2025-08-20 22:58:14 - INFO - Tokens per second: 42.3515119462839, Peak GPU memory MB: 9378.375 +2025-08-20 22:58:14 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] Inference time: 24.73 seconds, CPU usage: 38.5%, CPU core utilization: [63.4, 19.3, 50.9, 20.2] +2025-08-20 22:58:14 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] Cleaned up temporary file: temp_videos/feb0c88d-775a-48d3-b753-f84bd9a71c31.mp4 +2025-08-20 22:58:14 - INFO - [feb0c88d-775a-48d3-b753-f84bd9a71c31] Cleaned up temporary frame directory: temp_videos/feb0c88d-775a-48d3-b753-f84bd9a71c31 +2025-08-20 22:58:14 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_045.mp4' +2025-08-20 22:58:14 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] Video saved to temporary file: temp_videos/4de136b1-8fd4-4d8c-856c-31c8f0fab002.mp4 +2025-08-20 22:58:14 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:58:19 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:58:19 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] 30 frames saved to temp_videos/4de136b1-8fd4-4d8c-856c-31c8f0fab002 +2025-08-20 22:58:19 - INFO - Prompt token length: 3604 +2025-08-20 22:58:39 - INFO - Tokens per second: 42.61727911180951, Peak GPU memory MB: 9378.375 +2025-08-20 22:58:39 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] Inference time: 24.74 seconds, CPU usage: 38.2%, CPU core utilization: [22.8, 83.6, 19.8, 26.4] +2025-08-20 22:58:39 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] Cleaned up temporary file: temp_videos/4de136b1-8fd4-4d8c-856c-31c8f0fab002.mp4 +2025-08-20 22:58:39 - INFO - [4de136b1-8fd4-4d8c-856c-31c8f0fab002] Cleaned up temporary frame directory: temp_videos/4de136b1-8fd4-4d8c-856c-31c8f0fab002 +2025-08-20 22:58:39 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_046.mp4' +2025-08-20 22:58:39 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] Video saved to temporary file: temp_videos/087c2a1c-4994-468f-82a5-953eb56936ee.mp4 +2025-08-20 22:58:39 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:58:44 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:58:44 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] 30 frames saved to temp_videos/087c2a1c-4994-468f-82a5-953eb56936ee +2025-08-20 22:58:44 - INFO - Prompt token length: 3604 +2025-08-20 22:59:01 - INFO - Tokens per second: 43.249674450395666, Peak GPU memory MB: 9378.375 +2025-08-20 22:59:01 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] Inference time: 21.93 seconds, CPU usage: 39.3%, CPU core utilization: [83.8, 19.9, 30.3, 23.2] +2025-08-20 22:59:01 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] Cleaned up temporary file: temp_videos/087c2a1c-4994-468f-82a5-953eb56936ee.mp4 +2025-08-20 22:59:01 - INFO - [087c2a1c-4994-468f-82a5-953eb56936ee] Cleaned up temporary frame directory: temp_videos/087c2a1c-4994-468f-82a5-953eb56936ee +2025-08-20 22:59:01 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_046.mp4' +2025-08-20 22:59:01 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] Video saved to temporary file: temp_videos/831c6126-0127-4b8a-8156-b89d4bb8dc7e.mp4 +2025-08-20 22:59:01 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:59:06 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:59:06 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] 30 frames saved to temp_videos/831c6126-0127-4b8a-8156-b89d4bb8dc7e +2025-08-20 22:59:06 - INFO - Prompt token length: 3604 +2025-08-20 22:59:23 - INFO - Tokens per second: 43.059706339265055, Peak GPU memory MB: 9378.375 +2025-08-20 22:59:23 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] Inference time: 21.99 seconds, CPU usage: 39.9%, CPU core utilization: [22.2, 66.7, 21.8, 49.2] +2025-08-20 22:59:23 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] Cleaned up temporary file: temp_videos/831c6126-0127-4b8a-8156-b89d4bb8dc7e.mp4 +2025-08-20 22:59:23 - INFO - [831c6126-0127-4b8a-8156-b89d4bb8dc7e] Cleaned up temporary frame directory: temp_videos/831c6126-0127-4b8a-8156-b89d4bb8dc7e +2025-08-20 22:59:23 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_047.mp4' +2025-08-20 22:59:23 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] Video saved to temporary file: temp_videos/b02cbe9c-034e-44e3-9f5c-f2a3d410de8a.mp4 +2025-08-20 22:59:23 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:59:28 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:59:28 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] 30 frames saved to temp_videos/b02cbe9c-034e-44e3-9f5c-f2a3d410de8a +2025-08-20 22:59:28 - INFO - Prompt token length: 3604 +2025-08-20 22:59:46 - INFO - Tokens per second: 42.36511761463495, Peak GPU memory MB: 9378.375 +2025-08-20 22:59:46 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] Inference time: 23.30 seconds, CPU usage: 39.0%, CPU core utilization: [43.1, 32.7, 57.2, 23.1] +2025-08-20 22:59:46 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] Cleaned up temporary file: temp_videos/b02cbe9c-034e-44e3-9f5c-f2a3d410de8a.mp4 +2025-08-20 22:59:46 - INFO - [b02cbe9c-034e-44e3-9f5c-f2a3d410de8a] Cleaned up temporary frame directory: temp_videos/b02cbe9c-034e-44e3-9f5c-f2a3d410de8a +2025-08-20 22:59:47 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_047.mp4' +2025-08-20 22:59:47 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] Video saved to temporary file: temp_videos/29f20fc9-520a-4863-81a9-106c43a3bc58.mp4 +2025-08-20 22:59:47 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 22:59:51 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 22:59:51 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] 30 frames saved to temp_videos/29f20fc9-520a-4863-81a9-106c43a3bc58 +2025-08-20 22:59:52 - INFO - Prompt token length: 3604 +2025-08-20 23:00:10 - INFO - Tokens per second: 42.53428786851631, Peak GPU memory MB: 9378.375 +2025-08-20 23:00:10 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] Inference time: 23.26 seconds, CPU usage: 38.7%, CPU core utilization: [22.5, 21.1, 85.9, 25.2] +2025-08-20 23:00:10 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] Cleaned up temporary file: temp_videos/29f20fc9-520a-4863-81a9-106c43a3bc58.mp4 +2025-08-20 23:00:10 - INFO - [29f20fc9-520a-4863-81a9-106c43a3bc58] Cleaned up temporary frame directory: temp_videos/29f20fc9-520a-4863-81a9-106c43a3bc58 +2025-08-20 23:00:10 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_048.mp4' +2025-08-20 23:00:10 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] Video saved to temporary file: temp_videos/bb729bf5-39d3-48fb-a99c-1390751762ba.mp4 +2025-08-20 23:00:10 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:00:15 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:00:15 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] 30 frames saved to temp_videos/bb729bf5-39d3-48fb-a99c-1390751762ba +2025-08-20 23:00:15 - INFO - Prompt token length: 3604 +2025-08-20 23:00:33 - INFO - Tokens per second: 42.81460133759092, Peak GPU memory MB: 9378.375 +2025-08-20 23:00:33 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] Inference time: 22.84 seconds, CPU usage: 39.4%, CPU core utilization: [21.4, 95.4, 20.1, 20.5] +2025-08-20 23:00:33 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] Cleaned up temporary file: temp_videos/bb729bf5-39d3-48fb-a99c-1390751762ba.mp4 +2025-08-20 23:00:33 - INFO - [bb729bf5-39d3-48fb-a99c-1390751762ba] Cleaned up temporary frame directory: temp_videos/bb729bf5-39d3-48fb-a99c-1390751762ba +2025-08-20 23:00:33 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_048.mp4' +2025-08-20 23:00:33 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] Video saved to temporary file: temp_videos/3abb0699-5ba4-446a-a054-c5256903a403.mp4 +2025-08-20 23:00:33 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:00:38 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:00:38 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] 30 frames saved to temp_videos/3abb0699-5ba4-446a-a054-c5256903a403 +2025-08-20 23:00:38 - INFO - Prompt token length: 3604 +2025-08-20 23:00:56 - INFO - Tokens per second: 43.10943641242185, Peak GPU memory MB: 9378.375 +2025-08-20 23:00:56 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] Inference time: 22.69 seconds, CPU usage: 39.3%, CPU core utilization: [38.0, 42.1, 56.6, 20.4] +2025-08-20 23:00:56 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] Cleaned up temporary file: temp_videos/3abb0699-5ba4-446a-a054-c5256903a403.mp4 +2025-08-20 23:00:56 - INFO - [3abb0699-5ba4-446a-a054-c5256903a403] Cleaned up temporary frame directory: temp_videos/3abb0699-5ba4-446a-a054-c5256903a403 +2025-08-20 23:00:56 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_049.mp4' +2025-08-20 23:00:56 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] Video saved to temporary file: temp_videos/d7dc77c0-254f-424f-83f4-ed836f3a91ba.mp4 +2025-08-20 23:00:56 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:01:01 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:01:01 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] 30 frames saved to temp_videos/d7dc77c0-254f-424f-83f4-ed836f3a91ba +2025-08-20 23:01:01 - INFO - Prompt token length: 3604 +2025-08-20 23:01:21 - INFO - Tokens per second: 42.50698010505375, Peak GPU memory MB: 9378.375 +2025-08-20 23:01:21 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] Inference time: 24.72 seconds, CPU usage: 39.4%, CPU core utilization: [20.8, 74.7, 20.5, 41.6] +2025-08-20 23:01:21 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] Cleaned up temporary file: temp_videos/d7dc77c0-254f-424f-83f4-ed836f3a91ba.mp4 +2025-08-20 23:01:21 - INFO - [d7dc77c0-254f-424f-83f4-ed836f3a91ba] Cleaned up temporary frame directory: temp_videos/d7dc77c0-254f-424f-83f4-ed836f3a91ba +2025-08-20 23:01:21 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_049.mp4' +2025-08-20 23:01:21 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] Video saved to temporary file: temp_videos/e2dd263a-8774-4cfe-97fa-7a87af0ceb25.mp4 +2025-08-20 23:01:21 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:01:26 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:01:26 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] 30 frames saved to temp_videos/e2dd263a-8774-4cfe-97fa-7a87af0ceb25 +2025-08-20 23:01:26 - INFO - Prompt token length: 3604 +2025-08-20 23:01:45 - INFO - Tokens per second: 42.78977476040254, Peak GPU memory MB: 9378.375 +2025-08-20 23:01:45 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] Inference time: 24.59 seconds, CPU usage: 38.3%, CPU core utilization: [21.3, 37.6, 18.5, 75.6] +2025-08-20 23:01:45 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] Cleaned up temporary file: temp_videos/e2dd263a-8774-4cfe-97fa-7a87af0ceb25.mp4 +2025-08-20 23:01:45 - INFO - [e2dd263a-8774-4cfe-97fa-7a87af0ceb25] Cleaned up temporary frame directory: temp_videos/e2dd263a-8774-4cfe-97fa-7a87af0ceb25 +2025-08-20 23:01:46 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_050.mp4' +2025-08-20 23:01:46 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] Video saved to temporary file: temp_videos/b95c51f4-bcef-4b15-9ea2-79872c805ccc.mp4 +2025-08-20 23:01:46 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:01:50 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:01:50 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] 30 frames saved to temp_videos/b95c51f4-bcef-4b15-9ea2-79872c805ccc +2025-08-20 23:01:51 - INFO - Prompt token length: 3604 +2025-08-20 23:02:13 - INFO - Tokens per second: 42.23388544166544, Peak GPU memory MB: 9378.375 +2025-08-20 23:02:13 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] Inference time: 26.92 seconds, CPU usage: 37.5%, CPU core utilization: [18.6, 72.9, 19.1, 39.4] +2025-08-20 23:02:13 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] Cleaned up temporary file: temp_videos/b95c51f4-bcef-4b15-9ea2-79872c805ccc.mp4 +2025-08-20 23:02:13 - INFO - [b95c51f4-bcef-4b15-9ea2-79872c805ccc] Cleaned up temporary frame directory: temp_videos/b95c51f4-bcef-4b15-9ea2-79872c805ccc +2025-08-20 23:02:13 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_050.mp4' +2025-08-20 23:02:13 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] Video saved to temporary file: temp_videos/fb484afe-5231-4034-b421-0c4ee72ee7d0.mp4 +2025-08-20 23:02:13 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:02:18 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:02:18 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] 30 frames saved to temp_videos/fb484afe-5231-4034-b421-0c4ee72ee7d0 +2025-08-20 23:02:18 - INFO - Prompt token length: 3604 +2025-08-20 23:02:40 - INFO - Tokens per second: 42.601318461085675, Peak GPU memory MB: 9378.375 +2025-08-20 23:02:40 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] Inference time: 26.90 seconds, CPU usage: 37.2%, CPU core utilization: [64.1, 32.5, 25.5, 26.8] +2025-08-20 23:02:40 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] Cleaned up temporary file: temp_videos/fb484afe-5231-4034-b421-0c4ee72ee7d0.mp4 +2025-08-20 23:02:40 - INFO - [fb484afe-5231-4034-b421-0c4ee72ee7d0] Cleaned up temporary frame directory: temp_videos/fb484afe-5231-4034-b421-0c4ee72ee7d0 +2025-08-20 23:02:40 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_051.mp4' +2025-08-20 23:02:40 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] Video saved to temporary file: temp_videos/4bbf7cea-443a-4af9-924c-c0c17990ee1b.mp4 +2025-08-20 23:02:40 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:02:45 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:02:45 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] 30 frames saved to temp_videos/4bbf7cea-443a-4af9-924c-c0c17990ee1b +2025-08-20 23:02:45 - INFO - Prompt token length: 3604 +2025-08-20 23:03:05 - INFO - Tokens per second: 42.34823834581445, Peak GPU memory MB: 9378.375 +2025-08-20 23:03:05 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] Inference time: 25.38 seconds, CPU usage: 37.7%, CPU core utilization: [45.4, 60.6, 19.2, 25.5] +2025-08-20 23:03:05 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] Cleaned up temporary file: temp_videos/4bbf7cea-443a-4af9-924c-c0c17990ee1b.mp4 +2025-08-20 23:03:05 - INFO - [4bbf7cea-443a-4af9-924c-c0c17990ee1b] Cleaned up temporary frame directory: temp_videos/4bbf7cea-443a-4af9-924c-c0c17990ee1b +2025-08-20 23:03:05 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_051.mp4' +2025-08-20 23:03:05 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] Video saved to temporary file: temp_videos/a8181ec3-6deb-4e8c-b3cb-d53838b34226.mp4 +2025-08-20 23:03:05 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:03:10 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:03:10 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] 30 frames saved to temp_videos/a8181ec3-6deb-4e8c-b3cb-d53838b34226 +2025-08-20 23:03:10 - INFO - Prompt token length: 3604 +2025-08-20 23:03:31 - INFO - Tokens per second: 42.85916018839002, Peak GPU memory MB: 9378.375 +2025-08-20 23:03:31 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] Inference time: 25.26 seconds, CPU usage: 38.3%, CPU core utilization: [67.3, 20.3, 45.2, 20.3] +2025-08-20 23:03:31 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] Cleaned up temporary file: temp_videos/a8181ec3-6deb-4e8c-b3cb-d53838b34226.mp4 +2025-08-20 23:03:31 - INFO - [a8181ec3-6deb-4e8c-b3cb-d53838b34226] Cleaned up temporary frame directory: temp_videos/a8181ec3-6deb-4e8c-b3cb-d53838b34226 +2025-08-20 23:03:31 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_052.mp4' +2025-08-20 23:03:31 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] Video saved to temporary file: temp_videos/64f9432c-e052-4fb4-99ec-95a03f915291.mp4 +2025-08-20 23:03:31 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:03:36 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:03:36 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] 30 frames saved to temp_videos/64f9432c-e052-4fb4-99ec-95a03f915291 +2025-08-20 23:03:36 - INFO - Prompt token length: 3604 +2025-08-20 23:03:54 - INFO - Tokens per second: 42.94569831378426, Peak GPU memory MB: 9378.375 +2025-08-20 23:03:54 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] Inference time: 22.75 seconds, CPU usage: 39.3%, CPU core utilization: [21.5, 19.9, 25.0, 90.5] +2025-08-20 23:03:54 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] Cleaned up temporary file: temp_videos/64f9432c-e052-4fb4-99ec-95a03f915291.mp4 +2025-08-20 23:03:54 - INFO - [64f9432c-e052-4fb4-99ec-95a03f915291] Cleaned up temporary frame directory: temp_videos/64f9432c-e052-4fb4-99ec-95a03f915291 +2025-08-20 23:03:54 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_052.mp4' +2025-08-20 23:03:54 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] Video saved to temporary file: temp_videos/a79715aa-2e90-4a21-890d-b6397fbe2f76.mp4 +2025-08-20 23:03:54 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:03:59 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:03:59 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] 30 frames saved to temp_videos/a79715aa-2e90-4a21-890d-b6397fbe2f76 +2025-08-20 23:03:59 - INFO - Prompt token length: 3604 +2025-08-20 23:04:16 - INFO - Tokens per second: 42.59541770032984, Peak GPU memory MB: 9378.375 +2025-08-20 23:04:16 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] Inference time: 22.76 seconds, CPU usage: 40.7%, CPU core utilization: [34.6, 31.3, 60.6, 36.2] +2025-08-20 23:04:16 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] Cleaned up temporary file: temp_videos/a79715aa-2e90-4a21-890d-b6397fbe2f76.mp4 +2025-08-20 23:04:16 - INFO - [a79715aa-2e90-4a21-890d-b6397fbe2f76] Cleaned up temporary frame directory: temp_videos/a79715aa-2e90-4a21-890d-b6397fbe2f76 +2025-08-20 23:04:17 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_053.mp4' +2025-08-20 23:04:17 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] Video saved to temporary file: temp_videos/6fd2f5de-b430-4d10-ba73-4c89f882c788.mp4 +2025-08-20 23:04:17 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:04:22 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:04:22 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] 30 frames saved to temp_videos/6fd2f5de-b430-4d10-ba73-4c89f882c788 +2025-08-20 23:04:22 - INFO - Prompt token length: 3604 +2025-08-20 23:04:43 - INFO - Tokens per second: 42.49892289738985, Peak GPU memory MB: 9378.375 +2025-08-20 23:04:43 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] Inference time: 26.64 seconds, CPU usage: 37.3%, CPU core utilization: [18.9, 19.9, 17.1, 93.3] +2025-08-20 23:04:43 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] Cleaned up temporary file: temp_videos/6fd2f5de-b430-4d10-ba73-4c89f882c788.mp4 +2025-08-20 23:04:43 - INFO - [6fd2f5de-b430-4d10-ba73-4c89f882c788] Cleaned up temporary frame directory: temp_videos/6fd2f5de-b430-4d10-ba73-4c89f882c788 +2025-08-20 23:04:43 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_053.mp4' +2025-08-20 23:04:44 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] Video saved to temporary file: temp_videos/29de0226-ad7b-485d-9240-43b7cc741a9d.mp4 +2025-08-20 23:04:44 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:04:48 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:04:48 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] 30 frames saved to temp_videos/29de0226-ad7b-485d-9240-43b7cc741a9d +2025-08-20 23:04:49 - INFO - Prompt token length: 3604 +2025-08-20 23:05:10 - INFO - Tokens per second: 42.52309268195912, Peak GPU memory MB: 9378.375 +2025-08-20 23:05:10 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] Inference time: 26.74 seconds, CPU usage: 37.4%, CPU core utilization: [18.6, 68.7, 17.1, 45.2] +2025-08-20 23:05:10 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] Cleaned up temporary file: temp_videos/29de0226-ad7b-485d-9240-43b7cc741a9d.mp4 +2025-08-20 23:05:10 - INFO - [29de0226-ad7b-485d-9240-43b7cc741a9d] Cleaned up temporary frame directory: temp_videos/29de0226-ad7b-485d-9240-43b7cc741a9d +2025-08-20 23:05:10 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_054.mp4' +2025-08-20 23:05:10 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] Video saved to temporary file: temp_videos/03d6e67f-8e91-4c8d-91f1-73344ccb52a8.mp4 +2025-08-20 23:05:10 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:05:15 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:05:15 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] 30 frames saved to temp_videos/03d6e67f-8e91-4c8d-91f1-73344ccb52a8 +2025-08-20 23:05:16 - INFO - Prompt token length: 3604 +2025-08-20 23:05:33 - INFO - Tokens per second: 43.08725558435238, Peak GPU memory MB: 9378.375 +2025-08-20 23:05:33 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] Inference time: 22.19 seconds, CPU usage: 39.7%, CPU core utilization: [53.2, 21.7, 63.5, 20.3] +2025-08-20 23:05:33 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] Cleaned up temporary file: temp_videos/03d6e67f-8e91-4c8d-91f1-73344ccb52a8.mp4 +2025-08-20 23:05:33 - INFO - [03d6e67f-8e91-4c8d-91f1-73344ccb52a8] Cleaned up temporary frame directory: temp_videos/03d6e67f-8e91-4c8d-91f1-73344ccb52a8 +2025-08-20 23:05:33 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_054.mp4' +2025-08-20 23:05:33 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] Video saved to temporary file: temp_videos/033436f8-aa03-4105-bfd4-cf2581b96f24.mp4 +2025-08-20 23:05:33 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:05:38 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:05:38 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] 30 frames saved to temp_videos/033436f8-aa03-4105-bfd4-cf2581b96f24 +2025-08-20 23:05:38 - INFO - Prompt token length: 3604 +2025-08-20 23:05:55 - INFO - Tokens per second: 43.295534779120004, Peak GPU memory MB: 9378.375 +2025-08-20 23:05:55 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] Inference time: 22.07 seconds, CPU usage: 39.6%, CPU core utilization: [66.3, 21.5, 48.7, 21.8] +2025-08-20 23:05:55 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] Cleaned up temporary file: temp_videos/033436f8-aa03-4105-bfd4-cf2581b96f24.mp4 +2025-08-20 23:05:55 - INFO - [033436f8-aa03-4105-bfd4-cf2581b96f24] Cleaned up temporary frame directory: temp_videos/033436f8-aa03-4105-bfd4-cf2581b96f24 +2025-08-20 23:05:55 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_055.mp4' +2025-08-20 23:05:55 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] Video saved to temporary file: temp_videos/39e0d201-6b42-41ad-a42d-21b1adfe9f53.mp4 +2025-08-20 23:05:55 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:06:00 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:06:00 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] 30 frames saved to temp_videos/39e0d201-6b42-41ad-a42d-21b1adfe9f53 +2025-08-20 23:06:00 - INFO - Prompt token length: 3604 +2025-08-20 23:06:20 - INFO - Tokens per second: 42.6048713404806, Peak GPU memory MB: 9378.375 +2025-08-20 23:06:20 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] Inference time: 24.96 seconds, CPU usage: 38.2%, CPU core utilization: [36.1, 49.3, 43.9, 23.8] +2025-08-20 23:06:20 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] Cleaned up temporary file: temp_videos/39e0d201-6b42-41ad-a42d-21b1adfe9f53.mp4 +2025-08-20 23:06:20 - INFO - [39e0d201-6b42-41ad-a42d-21b1adfe9f53] Cleaned up temporary frame directory: temp_videos/39e0d201-6b42-41ad-a42d-21b1adfe9f53 +2025-08-20 23:06:20 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_055.mp4' +2025-08-20 23:06:20 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] Video saved to temporary file: temp_videos/3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f.mp4 +2025-08-20 23:06:20 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:06:25 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:06:25 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] 30 frames saved to temp_videos/3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f +2025-08-20 23:06:25 - INFO - Prompt token length: 3604 +2025-08-20 23:06:45 - INFO - Tokens per second: 43.15558723881245, Peak GPU memory MB: 9378.375 +2025-08-20 23:06:45 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] Inference time: 24.84 seconds, CPU usage: 38.2%, CPU core utilization: [33.6, 44.5, 22.5, 52.2] +2025-08-20 23:06:45 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] Cleaned up temporary file: temp_videos/3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f.mp4 +2025-08-20 23:06:45 - INFO - [3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f] Cleaned up temporary frame directory: temp_videos/3795c6f8-ed71-4ea1-9bee-1a9ebb2fe67f +2025-08-20 23:06:45 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_056.mp4' +2025-08-20 23:06:45 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] Video saved to temporary file: temp_videos/3e9a4399-7bb2-44bc-a419-6fbd50ec7dac.mp4 +2025-08-20 23:06:45 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:06:50 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:06:50 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] 30 frames saved to temp_videos/3e9a4399-7bb2-44bc-a419-6fbd50ec7dac +2025-08-20 23:06:50 - INFO - Prompt token length: 3604 +2025-08-20 23:07:12 - INFO - Tokens per second: 42.60317461272888, Peak GPU memory MB: 9378.375 +2025-08-20 23:07:12 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] Inference time: 26.37 seconds, CPU usage: 37.6%, CPU core utilization: [19.0, 18.8, 17.2, 95.4] +2025-08-20 23:07:12 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] Cleaned up temporary file: temp_videos/3e9a4399-7bb2-44bc-a419-6fbd50ec7dac.mp4 +2025-08-20 23:07:12 - INFO - [3e9a4399-7bb2-44bc-a419-6fbd50ec7dac] Cleaned up temporary frame directory: temp_videos/3e9a4399-7bb2-44bc-a419-6fbd50ec7dac +2025-08-20 23:07:12 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_056.mp4' +2025-08-20 23:07:12 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] Video saved to temporary file: temp_videos/d5af78ae-c0c8-473d-8749-38c5728a5d0d.mp4 +2025-08-20 23:07:12 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:07:17 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:07:17 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] 30 frames saved to temp_videos/d5af78ae-c0c8-473d-8749-38c5728a5d0d +2025-08-20 23:07:17 - INFO - Prompt token length: 3604 +2025-08-20 23:07:38 - INFO - Tokens per second: 41.82676160274437, Peak GPU memory MB: 9378.375 +2025-08-20 23:07:38 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] Inference time: 26.51 seconds, CPU usage: 53.9%, CPU core utilization: [43.5, 65.8, 40.3, 66.1] +2025-08-20 23:07:38 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] Cleaned up temporary file: temp_videos/d5af78ae-c0c8-473d-8749-38c5728a5d0d.mp4 +2025-08-20 23:07:38 - INFO - [d5af78ae-c0c8-473d-8749-38c5728a5d0d] Cleaned up temporary frame directory: temp_videos/d5af78ae-c0c8-473d-8749-38c5728a5d0d +2025-08-20 23:07:38 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_057.mp4' +2025-08-20 23:07:39 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] Video saved to temporary file: temp_videos/e995f18e-94cc-467e-9d41-d9c9382e8dbf.mp4 +2025-08-20 23:07:39 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:07:43 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:07:43 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] 30 frames saved to temp_videos/e995f18e-94cc-467e-9d41-d9c9382e8dbf +2025-08-20 23:07:44 - INFO - Prompt token length: 3604 +2025-08-20 23:08:03 - INFO - Tokens per second: 42.67129610631823, Peak GPU memory MB: 9378.375 +2025-08-20 23:08:03 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] Inference time: 25.01 seconds, CPU usage: 38.1%, CPU core utilization: [19.9, 19.9, 17.5, 94.9] +2025-08-20 23:08:03 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] Cleaned up temporary file: temp_videos/e995f18e-94cc-467e-9d41-d9c9382e8dbf.mp4 +2025-08-20 23:08:03 - INFO - [e995f18e-94cc-467e-9d41-d9c9382e8dbf] Cleaned up temporary frame directory: temp_videos/e995f18e-94cc-467e-9d41-d9c9382e8dbf +2025-08-20 23:08:04 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_057.mp4' +2025-08-20 23:08:04 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] Video saved to temporary file: temp_videos/417d5b8b-60b9-473a-9bbc-3fbcb562a292.mp4 +2025-08-20 23:08:04 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:08:09 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:08:09 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] 30 frames saved to temp_videos/417d5b8b-60b9-473a-9bbc-3fbcb562a292 +2025-08-20 23:08:09 - INFO - Prompt token length: 3604 +2025-08-20 23:08:29 - INFO - Tokens per second: 42.26603746275003, Peak GPU memory MB: 9378.375 +2025-08-20 23:08:29 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] Inference time: 25.03 seconds, CPU usage: 40.2%, CPU core utilization: [35.2, 34.3, 55.8, 35.3] +2025-08-20 23:08:29 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] Cleaned up temporary file: temp_videos/417d5b8b-60b9-473a-9bbc-3fbcb562a292.mp4 +2025-08-20 23:08:29 - INFO - [417d5b8b-60b9-473a-9bbc-3fbcb562a292] Cleaned up temporary frame directory: temp_videos/417d5b8b-60b9-473a-9bbc-3fbcb562a292 +2025-08-20 23:08:29 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_058.mp4' +2025-08-20 23:08:29 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] Video saved to temporary file: temp_videos/eca7f4fb-46ca-427e-8cd8-3bb9a01aa521.mp4 +2025-08-20 23:08:29 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:08:34 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:08:34 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] 30 frames saved to temp_videos/eca7f4fb-46ca-427e-8cd8-3bb9a01aa521 +2025-08-20 23:08:34 - INFO - Prompt token length: 3604 +2025-08-20 23:08:53 - INFO - Tokens per second: 43.04006124843271, Peak GPU memory MB: 9378.375 +2025-08-20 23:08:53 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] Inference time: 24.56 seconds, CPU usage: 39.9%, CPU core utilization: [59.2, 24.2, 40.5, 35.6] +2025-08-20 23:08:53 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] Cleaned up temporary file: temp_videos/eca7f4fb-46ca-427e-8cd8-3bb9a01aa521.mp4 +2025-08-20 23:08:53 - INFO - [eca7f4fb-46ca-427e-8cd8-3bb9a01aa521] Cleaned up temporary frame directory: temp_videos/eca7f4fb-46ca-427e-8cd8-3bb9a01aa521 +2025-08-20 23:08:54 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_058.mp4' +2025-08-20 23:08:54 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] Video saved to temporary file: temp_videos/acf5c753-08fa-4423-a1d5-9b4da15087c1.mp4 +2025-08-20 23:08:54 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:08:58 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:08:58 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] 30 frames saved to temp_videos/acf5c753-08fa-4423-a1d5-9b4da15087c1 +2025-08-20 23:08:59 - INFO - Prompt token length: 3604 +2025-08-20 23:09:18 - INFO - Tokens per second: 43.234796973891356, Peak GPU memory MB: 9378.375 +2025-08-20 23:09:18 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] Inference time: 24.54 seconds, CPU usage: 38.5%, CPU core utilization: [42.4, 19.0, 72.2, 20.2] +2025-08-20 23:09:18 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] Cleaned up temporary file: temp_videos/acf5c753-08fa-4423-a1d5-9b4da15087c1.mp4 +2025-08-20 23:09:18 - INFO - [acf5c753-08fa-4423-a1d5-9b4da15087c1] Cleaned up temporary frame directory: temp_videos/acf5c753-08fa-4423-a1d5-9b4da15087c1 +2025-08-20 23:09:18 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_059.mp4' +2025-08-20 23:09:18 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] Video saved to temporary file: temp_videos/23ceb8f5-fc21-4854-b1b9-d50d6432411c.mp4 +2025-08-20 23:09:18 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:09:23 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:09:23 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] 30 frames saved to temp_videos/23ceb8f5-fc21-4854-b1b9-d50d6432411c +2025-08-20 23:09:24 - INFO - Prompt token length: 3604 +2025-08-20 23:09:42 - INFO - Tokens per second: 42.14474474836568, Peak GPU memory MB: 9378.375 +2025-08-20 23:09:42 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] Inference time: 23.65 seconds, CPU usage: 40.2%, CPU core utilization: [37.4, 26.5, 69.8, 26.9] +2025-08-20 23:09:42 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] Cleaned up temporary file: temp_videos/23ceb8f5-fc21-4854-b1b9-d50d6432411c.mp4 +2025-08-20 23:09:42 - INFO - [23ceb8f5-fc21-4854-b1b9-d50d6432411c] Cleaned up temporary frame directory: temp_videos/23ceb8f5-fc21-4854-b1b9-d50d6432411c diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250820_231154.log b/API_Transformers/logs/LFM2-VL-1.6B/20250820_231154.log new file mode 100644 index 0000000000000000000000000000000000000000..e1ce88162b8acc5e6be416b1388176be3063aecd --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250820_231154.log @@ -0,0 +1,4 @@ +2025-08-20 23:11:54 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-20 23:11:55 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-20 23:12:02 - INFO - Model loaded in 7.77 seconds +2025-08-20 23:12:02 - INFO - GPU Memory Usage after model load: 3023.64 MB diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250820_231714.log b/API_Transformers/logs/LFM2-VL-1.6B/20250820_231714.log new file mode 100644 index 0000000000000000000000000000000000000000..acc303c5a6a0b11903d2ccb43ef6b2625117f5ca --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250820_231714.log @@ -0,0 +1,67 @@ +2025-08-20 23:17:14 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-20 23:17:16 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-20 23:17:22 - INFO - Model loaded in 7.52 seconds +2025-08-20 23:17:22 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-20 23:18:11 - INFO - [7395856c-f721-471f-85da-b8268191bd53] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:18:11 - INFO - [7395856c-f721-471f-85da-b8268191bd53] Video saved to temporary file: temp_videos/7395856c-f721-471f-85da-b8268191bd53.mp4 +2025-08-20 23:18:11 - INFO - [7395856c-f721-471f-85da-b8268191bd53] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:18:16 - INFO - [7395856c-f721-471f-85da-b8268191bd53] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:18:16 - INFO - [7395856c-f721-471f-85da-b8268191bd53] 30 frames saved to temp_videos/7395856c-f721-471f-85da-b8268191bd53 +2025-08-20 23:18:16 - INFO - Prompt token length: 3604 +2025-08-20 23:18:36 - INFO - Tokens per second: 43.071821438037745, Peak GPU memory MB: 9378.375 +2025-08-20 23:18:36 - INFO - [7395856c-f721-471f-85da-b8268191bd53] Inference time: 24.96 seconds, CPU usage: 27.5%, CPU core utilization: [19.8, 26.4, 21.3, 42.5] +2025-08-20 23:18:36 - INFO - [7395856c-f721-471f-85da-b8268191bd53] Cleaned up temporary frame directory: temp_videos/7395856c-f721-471f-85da-b8268191bd53 +2025-08-20 23:18:36 - INFO - [d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:18:36 - INFO - [d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a] Video saved to temporary file: temp_videos/d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a.mp4 +2025-08-20 23:18:36 - INFO - [d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:18:41 - INFO - [d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:18:41 - INFO - [d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a] 30 frames saved to temp_videos/d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a +2025-08-20 23:18:41 - INFO - Prompt token length: 3604 +2025-08-20 23:19:01 - INFO - Tokens per second: 43.41840127709154, Peak GPU memory MB: 9378.375 +2025-08-20 23:19:01 - INFO - [d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a] Inference time: 24.87 seconds, CPU usage: 38.8%, CPU core utilization: [32.1, 40.6, 19.5, 63.0] +2025-08-20 23:19:01 - INFO - [d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a] Cleaned up temporary frame directory: temp_videos/d8cb0a25-8bb0-4b63-9b5a-30f5dc54fd7a +2025-08-20 23:20:04 - INFO - [9679be14-bd46-4299-ad82-b4bab2ae763c] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:20:04 - INFO - [9679be14-bd46-4299-ad82-b4bab2ae763c] Video saved to temporary file: temp_videos/9679be14-bd46-4299-ad82-b4bab2ae763c.mp4 +2025-08-20 23:20:04 - INFO - [9679be14-bd46-4299-ad82-b4bab2ae763c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:20:08 - INFO - [9679be14-bd46-4299-ad82-b4bab2ae763c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:20:08 - INFO - [9679be14-bd46-4299-ad82-b4bab2ae763c] 30 frames saved to temp_videos/9679be14-bd46-4299-ad82-b4bab2ae763c +2025-08-20 23:20:09 - INFO - Prompt token length: 3604 +2025-08-20 23:20:28 - INFO - Tokens per second: 43.04488796653849, Peak GPU memory MB: 9378.375 +2025-08-20 23:20:28 - INFO - [9679be14-bd46-4299-ad82-b4bab2ae763c] Inference time: 24.78 seconds, CPU usage: 14.3%, CPU core utilization: [8.6, 10.9, 9.3, 28.5] +2025-08-20 23:20:28 - INFO - [9679be14-bd46-4299-ad82-b4bab2ae763c] Cleaned up temporary frame directory: temp_videos/9679be14-bd46-4299-ad82-b4bab2ae763c +2025-08-20 23:20:28 - INFO - [3d9610ba-18fc-4512-876c-62775d568781] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:20:28 - INFO - [3d9610ba-18fc-4512-876c-62775d568781] Video saved to temporary file: temp_videos/3d9610ba-18fc-4512-876c-62775d568781.mp4 +2025-08-20 23:20:28 - INFO - [3d9610ba-18fc-4512-876c-62775d568781] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:20:33 - INFO - [3d9610ba-18fc-4512-876c-62775d568781] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:20:33 - INFO - [3d9610ba-18fc-4512-876c-62775d568781] 30 frames saved to temp_videos/3d9610ba-18fc-4512-876c-62775d568781 +2025-08-20 23:20:34 - INFO - Prompt token length: 3604 +2025-08-20 23:20:53 - INFO - Tokens per second: 43.20754976197641, Peak GPU memory MB: 9378.375 +2025-08-20 23:20:53 - INFO - [3d9610ba-18fc-4512-876c-62775d568781] Inference time: 24.83 seconds, CPU usage: 38.8%, CPU core utilization: [20.3, 39.8, 19.5, 75.4] +2025-08-20 23:20:53 - INFO - [3d9610ba-18fc-4512-876c-62775d568781] Cleaned up temporary frame directory: temp_videos/3d9610ba-18fc-4512-876c-62775d568781 +2025-08-20 23:20:53 - INFO - [91f8f3d5-15de-406b-834c-7aa4204cb5de] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-20 23:20:53 - INFO - [91f8f3d5-15de-406b-834c-7aa4204cb5de] Video saved to temporary file: temp_videos/91f8f3d5-15de-406b-834c-7aa4204cb5de.mp4 +2025-08-20 23:20:53 - INFO - [91f8f3d5-15de-406b-834c-7aa4204cb5de] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:20:58 - INFO - [91f8f3d5-15de-406b-834c-7aa4204cb5de] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:20:58 - INFO - [91f8f3d5-15de-406b-834c-7aa4204cb5de] 30 frames saved to temp_videos/91f8f3d5-15de-406b-834c-7aa4204cb5de +2025-08-20 23:20:58 - INFO - Prompt token length: 3604 +2025-08-20 23:21:21 - INFO - Tokens per second: 38.828257610417374, Peak GPU memory MB: 9378.375 +2025-08-20 23:21:21 - INFO - [91f8f3d5-15de-406b-834c-7aa4204cb5de] Inference time: 27.69 seconds, CPU usage: 54.6%, CPU core utilization: [46.3, 65.5, 40.6, 66.1] +2025-08-20 23:21:21 - INFO - [91f8f3d5-15de-406b-834c-7aa4204cb5de] Cleaned up temporary frame directory: temp_videos/91f8f3d5-15de-406b-834c-7aa4204cb5de +2025-08-20 23:21:21 - INFO - [9f1dd041-e69e-4183-8efa-f61fd08559c2] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-20 23:21:21 - INFO - [9f1dd041-e69e-4183-8efa-f61fd08559c2] Video saved to temporary file: temp_videos/9f1dd041-e69e-4183-8efa-f61fd08559c2.mp4 +2025-08-20 23:21:21 - INFO - [9f1dd041-e69e-4183-8efa-f61fd08559c2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:21:29 - INFO - [9f1dd041-e69e-4183-8efa-f61fd08559c2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:21:29 - INFO - [9f1dd041-e69e-4183-8efa-f61fd08559c2] 30 frames saved to temp_videos/9f1dd041-e69e-4183-8efa-f61fd08559c2 +2025-08-20 23:21:29 - INFO - Prompt token length: 3604 +2025-08-20 23:21:51 - INFO - Tokens per second: 43.03845068270847, Peak GPU memory MB: 9378.375 +2025-08-20 23:21:51 - INFO - [9f1dd041-e69e-4183-8efa-f61fd08559c2] Inference time: 29.71 seconds, CPU usage: 53.7%, CPU core utilization: [39.4, 55.5, 81.5, 38.5] +2025-08-20 23:21:51 - INFO - [9f1dd041-e69e-4183-8efa-f61fd08559c2] Cleaned up temporary frame directory: temp_videos/9f1dd041-e69e-4183-8efa-f61fd08559c2 +2025-08-20 23:21:51 - INFO - [827dbb61-d685-495c-883d-067090aa9e49] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-20 23:21:51 - INFO - [827dbb61-d685-495c-883d-067090aa9e49] Video saved to temporary file: temp_videos/827dbb61-d685-495c-883d-067090aa9e49.mp4 +2025-08-20 23:21:51 - INFO - [827dbb61-d685-495c-883d-067090aa9e49] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:21:56 - INFO - [827dbb61-d685-495c-883d-067090aa9e49] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:21:56 - INFO - [827dbb61-d685-495c-883d-067090aa9e49] 30 frames saved to temp_videos/827dbb61-d685-495c-883d-067090aa9e49 +2025-08-20 23:21:56 - INFO - Prompt token length: 3604 +2025-08-20 23:22:13 - INFO - Tokens per second: 43.19212817813234, Peak GPU memory MB: 9378.375 +2025-08-20 23:22:13 - INFO - [827dbb61-d685-495c-883d-067090aa9e49] Inference time: 22.10 seconds, CPU usage: 40.3%, CPU core utilization: [22.0, 88.1, 21.6, 29.3] +2025-08-20 23:22:13 - INFO - [827dbb61-d685-495c-883d-067090aa9e49] Cleaned up temporary frame directory: temp_videos/827dbb61-d685-495c-883d-067090aa9e49 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250820_232316.log b/API_Transformers/logs/LFM2-VL-1.6B/20250820_232316.log new file mode 100644 index 0000000000000000000000000000000000000000..0b50148baa503348748515fbbfcd6b989eb5aa33 --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250820_232316.log @@ -0,0 +1,24 @@ +2025-08-20 23:23:16 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-20 23:23:18 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-20 23:23:24 - INFO - Model loaded in 7.69 seconds +2025-08-20 23:23:24 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-20 23:23:36 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_001.mp4' +2025-08-20 23:23:36 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] Video saved to temporary file: temp_videos/dbbd5e6a-91d4-4c60-91ae-6b6d212302e1.mp4 +2025-08-20 23:23:36 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:23:44 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:23:44 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] 30 frames saved to temp_videos/dbbd5e6a-91d4-4c60-91ae-6b6d212302e1 +2025-08-20 23:23:44 - INFO - Prompt token length: 3604 +2025-08-20 23:24:04 - INFO - Tokens per second: 42.609188518901775, Peak GPU memory MB: 9378.375 +2025-08-20 23:24:04 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] Inference time: 28.08 seconds, CPU usage: 61.6%, CPU core utilization: [56.9, 67.6, 54.1, 67.9] +2025-08-20 23:24:04 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] Cleaned up temporary file: temp_videos/dbbd5e6a-91d4-4c60-91ae-6b6d212302e1.mp4 +2025-08-20 23:24:04 - INFO - [dbbd5e6a-91d4-4c60-91ae-6b6d212302e1] Cleaned up temporary frame directory: temp_videos/dbbd5e6a-91d4-4c60-91ae-6b6d212302e1 +2025-08-20 23:24:04 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: 'sample_part_002.mp4' +2025-08-20 23:24:04 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] Video saved to temporary file: temp_videos/2750fe67-230a-46a2-87b9-928b60a2a3d4.mp4 +2025-08-20 23:24:04 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:24:09 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:24:09 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] 30 frames saved to temp_videos/2750fe67-230a-46a2-87b9-928b60a2a3d4 +2025-08-20 23:24:10 - INFO - Prompt token length: 3604 +2025-08-20 23:24:32 - INFO - Tokens per second: 38.75598021320056, Peak GPU memory MB: 9378.375 +2025-08-20 23:24:32 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] Inference time: 27.76 seconds, CPU usage: 76.2%, CPU core utilization: [78.9, 78.2, 72.8, 75.0] +2025-08-20 23:24:32 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] Cleaned up temporary file: temp_videos/2750fe67-230a-46a2-87b9-928b60a2a3d4.mp4 +2025-08-20 23:24:32 - INFO - [2750fe67-230a-46a2-87b9-928b60a2a3d4] Cleaned up temporary frame directory: temp_videos/2750fe67-230a-46a2-87b9-928b60a2a3d4 diff --git a/API_Transformers/logs/LFM2-VL-1.6B/20250820_232542.log b/API_Transformers/logs/LFM2-VL-1.6B/20250820_232542.log new file mode 100644 index 0000000000000000000000000000000000000000..732c9f844906d99553c68e2207ffeed08075f8d9 --- /dev/null +++ b/API_Transformers/logs/LFM2-VL-1.6B/20250820_232542.log @@ -0,0 +1,130 @@ +2025-08-20 23:25:42 - INFO - Loading model: LiquidAI/LFM2-VL-1.6B +2025-08-20 23:25:44 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-20 23:25:50 - INFO - Model loaded in 7.25 seconds +2025-08-20 23:25:50 - INFO - GPU Memory Usage after model load: 3023.64 MB +2025-08-20 23:26:55 - INFO - [e83cd3f1-609c-4419-b86e-463266ac54ce] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:26:55 - INFO - [e83cd3f1-609c-4419-b86e-463266ac54ce] Video saved to temporary file: temp_videos/e83cd3f1-609c-4419-b86e-463266ac54ce.mp4 +2025-08-20 23:26:55 - INFO - [e83cd3f1-609c-4419-b86e-463266ac54ce] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:27:00 - INFO - [e83cd3f1-609c-4419-b86e-463266ac54ce] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:27:00 - INFO - [e83cd3f1-609c-4419-b86e-463266ac54ce] 30 frames saved to temp_videos/e83cd3f1-609c-4419-b86e-463266ac54ce +2025-08-20 23:27:00 - INFO - Prompt token length: 3604 +2025-08-20 23:27:20 - INFO - Tokens per second: 43.03910315479847, Peak GPU memory MB: 9378.375 +2025-08-20 23:27:20 - INFO - [e83cd3f1-609c-4419-b86e-463266ac54ce] Inference time: 25.01 seconds, CPU usage: 40.1%, CPU core utilization: [36.1, 41.1, 37.2, 46.1] +2025-08-20 23:27:20 - INFO - [e83cd3f1-609c-4419-b86e-463266ac54ce] Cleaned up temporary frame directory: temp_videos/e83cd3f1-609c-4419-b86e-463266ac54ce +2025-08-20 23:27:20 - INFO - [09fa6c2e-50b5-4c0a-ab72-c399a68e3b19] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-20 23:27:20 - INFO - [09fa6c2e-50b5-4c0a-ab72-c399a68e3b19] Video saved to temporary file: temp_videos/09fa6c2e-50b5-4c0a-ab72-c399a68e3b19.mp4 +2025-08-20 23:27:20 - INFO - [09fa6c2e-50b5-4c0a-ab72-c399a68e3b19] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:27:25 - INFO - [09fa6c2e-50b5-4c0a-ab72-c399a68e3b19] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:27:25 - INFO - [09fa6c2e-50b5-4c0a-ab72-c399a68e3b19] 30 frames saved to temp_videos/09fa6c2e-50b5-4c0a-ab72-c399a68e3b19 +2025-08-20 23:27:25 - INFO - Prompt token length: 3604 +2025-08-20 23:27:47 - INFO - Tokens per second: 42.95401647014546, Peak GPU memory MB: 9378.375 +2025-08-20 23:27:47 - INFO - [09fa6c2e-50b5-4c0a-ab72-c399a68e3b19] Inference time: 27.03 seconds, CPU usage: 39.6%, CPU core utilization: [61.4, 39.9, 35.3, 21.8] +2025-08-20 23:27:47 - INFO - [09fa6c2e-50b5-4c0a-ab72-c399a68e3b19] Cleaned up temporary frame directory: temp_videos/09fa6c2e-50b5-4c0a-ab72-c399a68e3b19 +2025-08-20 23:27:47 - INFO - [d2c64140-0f5b-4e4a-83b7-feabd7c4323d] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-20 23:27:47 - INFO - [d2c64140-0f5b-4e4a-83b7-feabd7c4323d] Video saved to temporary file: temp_videos/d2c64140-0f5b-4e4a-83b7-feabd7c4323d.mp4 +2025-08-20 23:27:47 - INFO - [d2c64140-0f5b-4e4a-83b7-feabd7c4323d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:27:52 - INFO - [d2c64140-0f5b-4e4a-83b7-feabd7c4323d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:27:52 - INFO - [d2c64140-0f5b-4e4a-83b7-feabd7c4323d] 30 frames saved to temp_videos/d2c64140-0f5b-4e4a-83b7-feabd7c4323d +2025-08-20 23:27:52 - INFO - Prompt token length: 3604 +2025-08-20 23:28:09 - INFO - Tokens per second: 43.51874005006489, Peak GPU memory MB: 9378.375 +2025-08-20 23:28:09 - INFO - [d2c64140-0f5b-4e4a-83b7-feabd7c4323d] Inference time: 22.02 seconds, CPU usage: 39.8%, CPU core utilization: [81.6, 22.0, 31.3, 24.0] +2025-08-20 23:28:09 - INFO - [d2c64140-0f5b-4e4a-83b7-feabd7c4323d] Cleaned up temporary frame directory: temp_videos/d2c64140-0f5b-4e4a-83b7-feabd7c4323d +2025-08-20 23:28:09 - INFO - [d27c71e9-88ae-44b1-834e-64d54e1645f9] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-20 23:28:09 - INFO - [d27c71e9-88ae-44b1-834e-64d54e1645f9] Video saved to temporary file: temp_videos/d27c71e9-88ae-44b1-834e-64d54e1645f9.mp4 +2025-08-20 23:28:09 - INFO - [d27c71e9-88ae-44b1-834e-64d54e1645f9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:28:14 - INFO - [d27c71e9-88ae-44b1-834e-64d54e1645f9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:28:14 - INFO - [d27c71e9-88ae-44b1-834e-64d54e1645f9] 30 frames saved to temp_videos/d27c71e9-88ae-44b1-834e-64d54e1645f9 +2025-08-20 23:28:14 - INFO - Prompt token length: 3604 +2025-08-20 23:28:32 - INFO - Tokens per second: 42.80125306128213, Peak GPU memory MB: 9378.375 +2025-08-20 23:28:32 - INFO - [d27c71e9-88ae-44b1-834e-64d54e1645f9] Inference time: 22.93 seconds, CPU usage: 39.8%, CPU core utilization: [19.9, 21.9, 21.1, 96.1] +2025-08-20 23:28:32 - INFO - [d27c71e9-88ae-44b1-834e-64d54e1645f9] Cleaned up temporary frame directory: temp_videos/d27c71e9-88ae-44b1-834e-64d54e1645f9 +2025-08-20 23:28:32 - INFO - [16164179-73f7-4aa7-a42b-e6453e0f48af] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-20 23:28:32 - INFO - [16164179-73f7-4aa7-a42b-e6453e0f48af] Video saved to temporary file: temp_videos/16164179-73f7-4aa7-a42b-e6453e0f48af.mp4 +2025-08-20 23:28:32 - INFO - [16164179-73f7-4aa7-a42b-e6453e0f48af] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:28:37 - INFO - [16164179-73f7-4aa7-a42b-e6453e0f48af] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:28:37 - INFO - [16164179-73f7-4aa7-a42b-e6453e0f48af] 30 frames saved to temp_videos/16164179-73f7-4aa7-a42b-e6453e0f48af +2025-08-20 23:28:37 - INFO - Prompt token length: 3604 +2025-08-20 23:28:57 - INFO - Tokens per second: 42.835329663650555, Peak GPU memory MB: 9378.375 +2025-08-20 23:28:57 - INFO - [16164179-73f7-4aa7-a42b-e6453e0f48af] Inference time: 24.90 seconds, CPU usage: 38.4%, CPU core utilization: [21.2, 37.7, 18.6, 76.1] +2025-08-20 23:28:57 - INFO - [16164179-73f7-4aa7-a42b-e6453e0f48af] Cleaned up temporary frame directory: temp_videos/16164179-73f7-4aa7-a42b-e6453e0f48af +2025-08-20 23:28:57 - INFO - [371792c3-0d46-4934-8417-8fb6f5b7e4c2] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-20 23:28:57 - INFO - [371792c3-0d46-4934-8417-8fb6f5b7e4c2] Video saved to temporary file: temp_videos/371792c3-0d46-4934-8417-8fb6f5b7e4c2.mp4 +2025-08-20 23:28:57 - INFO - [371792c3-0d46-4934-8417-8fb6f5b7e4c2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:29:02 - INFO - [371792c3-0d46-4934-8417-8fb6f5b7e4c2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:29:02 - INFO - [371792c3-0d46-4934-8417-8fb6f5b7e4c2] 30 frames saved to temp_videos/371792c3-0d46-4934-8417-8fb6f5b7e4c2 +2025-08-20 23:29:02 - INFO - Prompt token length: 3604 +2025-08-20 23:29:21 - INFO - Tokens per second: 43.349566710658124, Peak GPU memory MB: 9378.375 +2025-08-20 23:29:21 - INFO - [371792c3-0d46-4934-8417-8fb6f5b7e4c2] Inference time: 24.44 seconds, CPU usage: 39.0%, CPU core utilization: [45.2, 19.7, 70.7, 20.1] +2025-08-20 23:29:21 - INFO - [371792c3-0d46-4934-8417-8fb6f5b7e4c2] Cleaned up temporary frame directory: temp_videos/371792c3-0d46-4934-8417-8fb6f5b7e4c2 +2025-08-20 23:29:21 - INFO - [fe4d9541-064c-4c8d-bb08-c1d347420c33] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_007.mp4' +2025-08-20 23:29:21 - INFO - [fe4d9541-064c-4c8d-bb08-c1d347420c33] Video saved to temporary file: temp_videos/fe4d9541-064c-4c8d-bb08-c1d347420c33.mp4 +2025-08-20 23:29:21 - INFO - [fe4d9541-064c-4c8d-bb08-c1d347420c33] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:29:26 - INFO - [fe4d9541-064c-4c8d-bb08-c1d347420c33] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:29:26 - INFO - [fe4d9541-064c-4c8d-bb08-c1d347420c33] 30 frames saved to temp_videos/fe4d9541-064c-4c8d-bb08-c1d347420c33 +2025-08-20 23:29:27 - INFO - Prompt token length: 3604 +2025-08-20 23:29:43 - INFO - Tokens per second: 43.652183023369325, Peak GPU memory MB: 9378.375 +2025-08-20 23:29:43 - INFO - [fe4d9541-064c-4c8d-bb08-c1d347420c33] Inference time: 21.99 seconds, CPU usage: 39.8%, CPU core utilization: [80.7, 22.9, 34.6, 21.1] +2025-08-20 23:29:43 - INFO - [fe4d9541-064c-4c8d-bb08-c1d347420c33] Cleaned up temporary frame directory: temp_videos/fe4d9541-064c-4c8d-bb08-c1d347420c33 +2025-08-20 23:29:43 - INFO - [e5947bb6-739e-4bc1-bbe4-9ab58dc731d6] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_008.mp4' +2025-08-20 23:29:43 - INFO - [e5947bb6-739e-4bc1-bbe4-9ab58dc731d6] Video saved to temporary file: temp_videos/e5947bb6-739e-4bc1-bbe4-9ab58dc731d6.mp4 +2025-08-20 23:29:43 - INFO - [e5947bb6-739e-4bc1-bbe4-9ab58dc731d6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:29:48 - INFO - [e5947bb6-739e-4bc1-bbe4-9ab58dc731d6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:29:48 - INFO - [e5947bb6-739e-4bc1-bbe4-9ab58dc731d6] 30 frames saved to temp_videos/e5947bb6-739e-4bc1-bbe4-9ab58dc731d6 +2025-08-20 23:29:49 - INFO - Prompt token length: 3604 +2025-08-20 23:30:08 - INFO - Tokens per second: 42.780439620939916, Peak GPU memory MB: 9378.375 +2025-08-20 23:30:08 - INFO - [e5947bb6-739e-4bc1-bbe4-9ab58dc731d6] Inference time: 24.21 seconds, CPU usage: 41.1%, CPU core utilization: [62.1, 38.6, 41.8, 21.9] +2025-08-20 23:30:08 - INFO - [e5947bb6-739e-4bc1-bbe4-9ab58dc731d6] Cleaned up temporary frame directory: temp_videos/e5947bb6-739e-4bc1-bbe4-9ab58dc731d6 +2025-08-20 23:30:08 - INFO - [b8e6aa49-574c-447b-b3ef-fd02c306e746] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_009.mp4' +2025-08-20 23:30:08 - INFO - [b8e6aa49-574c-447b-b3ef-fd02c306e746] Video saved to temporary file: temp_videos/b8e6aa49-574c-447b-b3ef-fd02c306e746.mp4 +2025-08-20 23:30:08 - INFO - [b8e6aa49-574c-447b-b3ef-fd02c306e746] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:30:13 - INFO - [b8e6aa49-574c-447b-b3ef-fd02c306e746] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:30:13 - INFO - [b8e6aa49-574c-447b-b3ef-fd02c306e746] 30 frames saved to temp_videos/b8e6aa49-574c-447b-b3ef-fd02c306e746 +2025-08-20 23:30:13 - INFO - Prompt token length: 3604 +2025-08-20 23:30:33 - INFO - Tokens per second: 43.12406437638801, Peak GPU memory MB: 9378.375 +2025-08-20 23:30:33 - INFO - [b8e6aa49-574c-447b-b3ef-fd02c306e746] Inference time: 25.30 seconds, CPU usage: 38.6%, CPU core utilization: [18.8, 47.6, 19.3, 68.7] +2025-08-20 23:30:33 - INFO - [b8e6aa49-574c-447b-b3ef-fd02c306e746] Cleaned up temporary frame directory: temp_videos/b8e6aa49-574c-447b-b3ef-fd02c306e746 +2025-08-20 23:32:24 - INFO - [f04edbef-149d-425b-8805-113e2ea54029] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:32:24 - INFO - [f04edbef-149d-425b-8805-113e2ea54029] Video saved to temporary file: temp_videos/f04edbef-149d-425b-8805-113e2ea54029.mp4 +2025-08-20 23:32:24 - INFO - [f04edbef-149d-425b-8805-113e2ea54029] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:32:29 - INFO - [f04edbef-149d-425b-8805-113e2ea54029] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:32:29 - INFO - [f04edbef-149d-425b-8805-113e2ea54029] 30 frames saved to temp_videos/f04edbef-149d-425b-8805-113e2ea54029 +2025-08-20 23:32:30 - INFO - Prompt token length: 3613 +2025-08-20 23:32:46 - INFO - Tokens per second: 43.735840716197266, Peak GPU memory MB: 9378.375 +2025-08-20 23:32:46 - INFO - [f04edbef-149d-425b-8805-113e2ea54029] Inference time: 22.10 seconds, CPU usage: 8.4%, CPU core utilization: [9.6, 5.9, 10.7, 7.5] +2025-08-20 23:32:46 - INFO - [f04edbef-149d-425b-8805-113e2ea54029] Cleaned up temporary frame directory: temp_videos/f04edbef-149d-425b-8805-113e2ea54029 +2025-08-20 23:32:46 - INFO - [a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-20 23:32:46 - INFO - [a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2] Video saved to temporary file: temp_videos/a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2.mp4 +2025-08-20 23:32:46 - INFO - [a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:32:51 - INFO - [a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:32:51 - INFO - [a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2] 30 frames saved to temp_videos/a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2 +2025-08-20 23:32:52 - INFO - Prompt token length: 3613 +2025-08-20 23:33:09 - INFO - Tokens per second: 43.61585189482321, Peak GPU memory MB: 9378.375 +2025-08-20 23:33:09 - INFO - [a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2] Inference time: 22.20 seconds, CPU usage: 41.2%, CPU core utilization: [40.5, 22.9, 75.8, 25.4] +2025-08-20 23:33:09 - INFO - [a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2] Cleaned up temporary frame directory: temp_videos/a0f9ff7d-76c8-42a2-ac7f-8064d56ae6f2 +2025-08-20 23:33:09 - INFO - [2f221dc2-a54d-4d32-8900-2b5c046cebaf] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-20 23:33:09 - INFO - [2f221dc2-a54d-4d32-8900-2b5c046cebaf] Video saved to temporary file: temp_videos/2f221dc2-a54d-4d32-8900-2b5c046cebaf.mp4 +2025-08-20 23:33:09 - INFO - [2f221dc2-a54d-4d32-8900-2b5c046cebaf] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:33:14 - INFO - [2f221dc2-a54d-4d32-8900-2b5c046cebaf] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:33:14 - INFO - [2f221dc2-a54d-4d32-8900-2b5c046cebaf] 30 frames saved to temp_videos/2f221dc2-a54d-4d32-8900-2b5c046cebaf +2025-08-20 23:33:14 - INFO - Prompt token length: 3613 +2025-08-20 23:33:31 - INFO - Tokens per second: 43.7656086803256, Peak GPU memory MB: 9378.375 +2025-08-20 23:33:31 - INFO - [2f221dc2-a54d-4d32-8900-2b5c046cebaf] Inference time: 21.94 seconds, CPU usage: 40.5%, CPU core utilization: [37.9, 46.5, 22.3, 55.2] +2025-08-20 23:33:31 - INFO - [2f221dc2-a54d-4d32-8900-2b5c046cebaf] Cleaned up temporary frame directory: temp_videos/2f221dc2-a54d-4d32-8900-2b5c046cebaf +2025-08-20 23:33:31 - INFO - [a0b144b5-c0eb-4c16-8826-7047eed0dbed] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-20 23:33:31 - INFO - [a0b144b5-c0eb-4c16-8826-7047eed0dbed] Video saved to temporary file: temp_videos/a0b144b5-c0eb-4c16-8826-7047eed0dbed.mp4 +2025-08-20 23:33:31 - INFO - [a0b144b5-c0eb-4c16-8826-7047eed0dbed] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:33:35 - INFO - [a0b144b5-c0eb-4c16-8826-7047eed0dbed] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:33:35 - INFO - [a0b144b5-c0eb-4c16-8826-7047eed0dbed] 30 frames saved to temp_videos/a0b144b5-c0eb-4c16-8826-7047eed0dbed +2025-08-20 23:33:36 - INFO - Prompt token length: 3613 +2025-08-20 23:33:52 - INFO - Tokens per second: 43.67951928934803, Peak GPU memory MB: 9378.375 +2025-08-20 23:33:52 - INFO - [a0b144b5-c0eb-4c16-8826-7047eed0dbed] Inference time: 21.80 seconds, CPU usage: 40.2%, CPU core utilization: [67.0, 20.9, 50.2, 22.5] +2025-08-20 23:33:52 - INFO - [a0b144b5-c0eb-4c16-8826-7047eed0dbed] Cleaned up temporary frame directory: temp_videos/a0b144b5-c0eb-4c16-8826-7047eed0dbed +2025-08-20 23:33:52 - INFO - [526b7643-3bfd-4e06-91c3-b91651e42819] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-20 23:33:52 - INFO - [526b7643-3bfd-4e06-91c3-b91651e42819] Video saved to temporary file: temp_videos/526b7643-3bfd-4e06-91c3-b91651e42819.mp4 +2025-08-20 23:33:52 - INFO - [526b7643-3bfd-4e06-91c3-b91651e42819] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:33:57 - INFO - [526b7643-3bfd-4e06-91c3-b91651e42819] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:33:57 - INFO - [526b7643-3bfd-4e06-91c3-b91651e42819] 30 frames saved to temp_videos/526b7643-3bfd-4e06-91c3-b91651e42819 +2025-08-20 23:33:58 - INFO - Prompt token length: 3613 +2025-08-20 23:34:15 - INFO - Tokens per second: 43.287042978207154, Peak GPU memory MB: 9378.375 +2025-08-20 23:34:15 - INFO - [526b7643-3bfd-4e06-91c3-b91651e42819] Inference time: 22.61 seconds, CPU usage: 40.3%, CPU core utilization: [21.9, 27.2, 21.6, 90.4] +2025-08-20 23:34:15 - INFO - [526b7643-3bfd-4e06-91c3-b91651e42819] Cleaned up temporary frame directory: temp_videos/526b7643-3bfd-4e06-91c3-b91651e42819 diff --git a/API_Transformers/logs/MiniCPM-V-4/20250819_004631.log b/API_Transformers/logs/MiniCPM-V-4/20250819_004631.log new file mode 100644 index 0000000000000000000000000000000000000000..7245b87443207035f19bcba1e2916b407d0d1cb0 --- /dev/null +++ b/API_Transformers/logs/MiniCPM-V-4/20250819_004631.log @@ -0,0 +1,14 @@ +2025-08-19 00:46:31 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-19 00:46:31 - INFO - vision_config is None, using default vision config +2025-08-19 00:47:35 - INFO - Model loaded in 64.26 seconds +2025-08-19 00:47:35 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-19 00:48:00 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 00:48:00 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] Video saved to temporary file: temp_videos/be95cc0f-0dca-41c0-a89b-822971620a94.mp4 +2025-08-19 00:48:00 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:48:02 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:48:02 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] 30 frames saved to temp_videos/be95cc0f-0dca-41c0-a89b-822971620a94 +2025-08-19 00:48:20 - INFO - vision_config is None, using default vision config +2025-08-19 00:48:40 - INFO - Tokens per second: 9.330762006151874, Peak GPU memory MB: 11824.375 +2025-08-19 00:48:40 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] Inference time: 40.34 seconds, CPU usage: 32.3%, CPU core utilization: [30.8, 32.9, 36.8, 28.6] +2025-08-19 00:48:40 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] Cleaned up temporary file: temp_videos/be95cc0f-0dca-41c0-a89b-822971620a94.mp4 +2025-08-19 00:48:40 - INFO - [be95cc0f-0dca-41c0-a89b-822971620a94] Cleaned up temporary frame directory: temp_videos/be95cc0f-0dca-41c0-a89b-822971620a94 diff --git a/API_Transformers/logs/MiniCPM-V-4/20250819_013451.log b/API_Transformers/logs/MiniCPM-V-4/20250819_013451.log new file mode 100644 index 0000000000000000000000000000000000000000..b7b6f45ff215d9d3e8a6bc56abaa2a010b168ba2 --- /dev/null +++ b/API_Transformers/logs/MiniCPM-V-4/20250819_013451.log @@ -0,0 +1,454 @@ +2025-08-19 01:34:51 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-19 01:34:51 - INFO - vision_config is None, using default vision config +2025-08-19 01:35:55 - INFO - Model loaded in 64.47 seconds +2025-08-19 01:35:55 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-19 01:36:23 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 01:36:23 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] Video saved to temporary file: temp_videos/5f513c59-eb71-4e8a-82d4-e0872d45ebdd.mp4 +2025-08-19 01:36:23 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:36:26 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:36:26 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] 30 frames saved to temp_videos/5f513c59-eb71-4e8a-82d4-e0872d45ebdd +2025-08-19 01:36:43 - INFO - vision_config is None, using default vision config +2025-08-19 01:37:11 - INFO - Tokens per second: 10.464318161039833, Peak GPU memory MB: 11824.375 +2025-08-19 01:37:11 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] Inference time: 47.79 seconds, CPU usage: 24.3%, CPU core utilization: [22.4, 30.4, 23.4, 21.0] +2025-08-19 01:37:11 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] Cleaned up temporary file: temp_videos/5f513c59-eb71-4e8a-82d4-e0872d45ebdd.mp4 +2025-08-19 01:37:11 - INFO - [5f513c59-eb71-4e8a-82d4-e0872d45ebdd] Cleaned up temporary frame directory: temp_videos/5f513c59-eb71-4e8a-82d4-e0872d45ebdd +2025-08-19 01:37:11 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_002.mp4' +2025-08-19 01:37:11 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] Video saved to temporary file: temp_videos/fa0ae957-fdb5-40cc-95dd-32ca84f8be61.mp4 +2025-08-19 01:37:11 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:37:17 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:37:17 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] 30 frames saved to temp_videos/fa0ae957-fdb5-40cc-95dd-32ca84f8be61 +2025-08-19 01:37:30 - INFO - vision_config is None, using default vision config +2025-08-19 01:38:05 - INFO - Tokens per second: 11.217051883159877, Peak GPU memory MB: 11824.375 +2025-08-19 01:38:05 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] Inference time: 53.80 seconds, CPU usage: 37.5%, CPU core utilization: [37.0, 34.8, 51.4, 26.6] +2025-08-19 01:38:05 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] Cleaned up temporary file: temp_videos/fa0ae957-fdb5-40cc-95dd-32ca84f8be61.mp4 +2025-08-19 01:38:05 - INFO - [fa0ae957-fdb5-40cc-95dd-32ca84f8be61] Cleaned up temporary frame directory: temp_videos/fa0ae957-fdb5-40cc-95dd-32ca84f8be61 +2025-08-19 01:38:05 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_003.mp4' +2025-08-19 01:38:05 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] Video saved to temporary file: temp_videos/b55d97c9-7bdb-4950-b049-3d72db40e001.mp4 +2025-08-19 01:38:05 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:38:10 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:38:10 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] 30 frames saved to temp_videos/b55d97c9-7bdb-4950-b049-3d72db40e001 +2025-08-19 01:38:23 - INFO - vision_config is None, using default vision config +2025-08-19 01:38:47 - INFO - Tokens per second: 10.186586825003255, Peak GPU memory MB: 11824.375 +2025-08-19 01:38:47 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] Inference time: 42.50 seconds, CPU usage: 38.7%, CPU core utilization: [19.7, 39.0, 22.6, 73.4] +2025-08-19 01:38:47 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] Cleaned up temporary file: temp_videos/b55d97c9-7bdb-4950-b049-3d72db40e001.mp4 +2025-08-19 01:38:47 - INFO - [b55d97c9-7bdb-4950-b049-3d72db40e001] Cleaned up temporary frame directory: temp_videos/b55d97c9-7bdb-4950-b049-3d72db40e001 +2025-08-19 01:38:48 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_004.mp4' +2025-08-19 01:38:48 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] Video saved to temporary file: temp_videos/67e694e2-b806-4fed-a8da-5238bdc3deba.mp4 +2025-08-19 01:38:48 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:38:53 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:38:53 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] 30 frames saved to temp_videos/67e694e2-b806-4fed-a8da-5238bdc3deba +2025-08-19 01:39:06 - INFO - vision_config is None, using default vision config +2025-08-19 01:39:32 - INFO - Tokens per second: 10.441963736015857, Peak GPU memory MB: 11824.375 +2025-08-19 01:39:32 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] Inference time: 44.20 seconds, CPU usage: 37.8%, CPU core utilization: [45.7, 33.2, 52.3, 20.1] +2025-08-19 01:39:32 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] Cleaned up temporary file: temp_videos/67e694e2-b806-4fed-a8da-5238bdc3deba.mp4 +2025-08-19 01:39:32 - INFO - [67e694e2-b806-4fed-a8da-5238bdc3deba] Cleaned up temporary frame directory: temp_videos/67e694e2-b806-4fed-a8da-5238bdc3deba +2025-08-19 01:39:32 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_005.mp4' +2025-08-19 01:39:32 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] Video saved to temporary file: temp_videos/e48d00ae-e1ad-4f14-82d3-b63013965879.mp4 +2025-08-19 01:39:32 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:39:37 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:39:37 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] 30 frames saved to temp_videos/e48d00ae-e1ad-4f14-82d3-b63013965879 +2025-08-19 01:39:50 - INFO - vision_config is None, using default vision config +2025-08-19 01:40:24 - INFO - Tokens per second: 11.117427002797763, Peak GPU memory MB: 11824.375 +2025-08-19 01:40:24 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] Inference time: 52.02 seconds, CPU usage: 37.7%, CPU core utilization: [22.2, 47.8, 42.5, 38.0] +2025-08-19 01:40:24 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] Cleaned up temporary file: temp_videos/e48d00ae-e1ad-4f14-82d3-b63013965879.mp4 +2025-08-19 01:40:24 - INFO - [e48d00ae-e1ad-4f14-82d3-b63013965879] Cleaned up temporary frame directory: temp_videos/e48d00ae-e1ad-4f14-82d3-b63013965879 +2025-08-19 01:40:24 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_006.mp4' +2025-08-19 01:40:24 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] Video saved to temporary file: temp_videos/a1477422-c3f9-4646-aa38-ab9853d12940.mp4 +2025-08-19 01:40:24 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:40:29 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:40:29 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] 30 frames saved to temp_videos/a1477422-c3f9-4646-aa38-ab9853d12940 +2025-08-19 01:40:42 - INFO - vision_config is None, using default vision config +2025-08-19 01:41:12 - INFO - Tokens per second: 10.739343508452546, Peak GPU memory MB: 11824.375 +2025-08-19 01:41:12 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] Inference time: 47.88 seconds, CPU usage: 49.1%, CPU core utilization: [40.6, 44.5, 64.1, 47.1] +2025-08-19 01:41:12 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] Cleaned up temporary file: temp_videos/a1477422-c3f9-4646-aa38-ab9853d12940.mp4 +2025-08-19 01:41:12 - INFO - [a1477422-c3f9-4646-aa38-ab9853d12940] Cleaned up temporary frame directory: temp_videos/a1477422-c3f9-4646-aa38-ab9853d12940 +2025-08-19 01:41:12 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_007.mp4' +2025-08-19 01:41:12 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] Video saved to temporary file: temp_videos/f193d31f-d522-4a86-b522-2a418a14e805.mp4 +2025-08-19 01:41:12 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:41:20 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:41:20 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] 30 frames saved to temp_videos/f193d31f-d522-4a86-b522-2a418a14e805 +2025-08-19 01:41:33 - INFO - vision_config is None, using default vision config +2025-08-19 01:41:59 - INFO - Tokens per second: 10.35898493058187, Peak GPU memory MB: 11824.375 +2025-08-19 01:41:59 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] Inference time: 47.03 seconds, CPU usage: 45.1%, CPU core utilization: [40.8, 41.1, 35.0, 63.3] +2025-08-19 01:41:59 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] Cleaned up temporary file: temp_videos/f193d31f-d522-4a86-b522-2a418a14e805.mp4 +2025-08-19 01:41:59 - INFO - [f193d31f-d522-4a86-b522-2a418a14e805] Cleaned up temporary frame directory: temp_videos/f193d31f-d522-4a86-b522-2a418a14e805 +2025-08-19 01:41:59 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_008.mp4' +2025-08-19 01:41:59 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] Video saved to temporary file: temp_videos/91650f5d-a4b0-4697-a296-99c3924d4e4e.mp4 +2025-08-19 01:41:59 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:42:04 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:42:04 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] 30 frames saved to temp_videos/91650f5d-a4b0-4697-a296-99c3924d4e4e +2025-08-19 01:42:17 - INFO - vision_config is None, using default vision config +2025-08-19 01:42:42 - INFO - Tokens per second: 10.245622255156498, Peak GPU memory MB: 11824.375 +2025-08-19 01:42:42 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] Inference time: 42.95 seconds, CPU usage: 37.7%, CPU core utilization: [19.0, 25.2, 47.2, 59.2] +2025-08-19 01:42:42 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] Cleaned up temporary file: temp_videos/91650f5d-a4b0-4697-a296-99c3924d4e4e.mp4 +2025-08-19 01:42:42 - INFO - [91650f5d-a4b0-4697-a296-99c3924d4e4e] Cleaned up temporary frame directory: temp_videos/91650f5d-a4b0-4697-a296-99c3924d4e4e +2025-08-19 01:42:42 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_009.mp4' +2025-08-19 01:42:42 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] Video saved to temporary file: temp_videos/62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6.mp4 +2025-08-19 01:42:42 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:42:46 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:42:46 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] 30 frames saved to temp_videos/62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6 +2025-08-19 01:42:59 - INFO - vision_config is None, using default vision config +2025-08-19 01:43:20 - INFO - Tokens per second: 9.668944653220052, Peak GPU memory MB: 11824.375 +2025-08-19 01:43:20 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] Inference time: 38.62 seconds, CPU usage: 37.4%, CPU core utilization: [29.2, 59.9, 33.9, 26.4] +2025-08-19 01:43:20 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] Cleaned up temporary file: temp_videos/62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6.mp4 +2025-08-19 01:43:20 - INFO - [62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6] Cleaned up temporary frame directory: temp_videos/62d9d4de-0e76-48c3-aadc-bd1d6ff2a7b6 +2025-08-19 01:43:20 - INFO - [28b04049-5753-4381-96f8-648268642404] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_010.mp4' +2025-08-19 01:43:20 - INFO - [28b04049-5753-4381-96f8-648268642404] Video saved to temporary file: temp_videos/28b04049-5753-4381-96f8-648268642404.mp4 +2025-08-19 01:43:20 - INFO - [28b04049-5753-4381-96f8-648268642404] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:43:26 - INFO - [28b04049-5753-4381-96f8-648268642404] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:43:26 - INFO - [28b04049-5753-4381-96f8-648268642404] 30 frames saved to temp_videos/28b04049-5753-4381-96f8-648268642404 +2025-08-19 01:43:39 - INFO - vision_config is None, using default vision config +2025-08-19 01:44:07 - INFO - Tokens per second: 10.571052792003025, Peak GPU memory MB: 11824.375 +2025-08-19 01:44:07 - INFO - [28b04049-5753-4381-96f8-648268642404] Inference time: 46.56 seconds, CPU usage: 58.2%, CPU core utilization: [47.8, 64.3, 48.6, 72.1] +2025-08-19 01:44:07 - INFO - [28b04049-5753-4381-96f8-648268642404] Cleaned up temporary file: temp_videos/28b04049-5753-4381-96f8-648268642404.mp4 +2025-08-19 01:44:07 - INFO - [28b04049-5753-4381-96f8-648268642404] Cleaned up temporary frame directory: temp_videos/28b04049-5753-4381-96f8-648268642404 +2025-08-19 01:44:07 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_011.mp4' +2025-08-19 01:44:07 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] Video saved to temporary file: temp_videos/b6c7b63d-5909-4e4d-a822-f476e3891ec5.mp4 +2025-08-19 01:44:07 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:44:13 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:44:13 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] 30 frames saved to temp_videos/b6c7b63d-5909-4e4d-a822-f476e3891ec5 +2025-08-19 01:44:26 - INFO - vision_config is None, using default vision config +2025-08-19 01:44:53 - INFO - Tokens per second: 10.543281182614821, Peak GPU memory MB: 11824.375 +2025-08-19 01:44:53 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] Inference time: 45.94 seconds, CPU usage: 37.9%, CPU core utilization: [35.8, 33.7, 38.4, 43.8] +2025-08-19 01:44:53 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] Cleaned up temporary file: temp_videos/b6c7b63d-5909-4e4d-a822-f476e3891ec5.mp4 +2025-08-19 01:44:53 - INFO - [b6c7b63d-5909-4e4d-a822-f476e3891ec5] Cleaned up temporary frame directory: temp_videos/b6c7b63d-5909-4e4d-a822-f476e3891ec5 +2025-08-19 01:44:53 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_012.mp4' +2025-08-19 01:44:53 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] Video saved to temporary file: temp_videos/c21259e8-cf6f-4def-8187-054b3e96dad1.mp4 +2025-08-19 01:44:53 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:44:59 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:44:59 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] 30 frames saved to temp_videos/c21259e8-cf6f-4def-8187-054b3e96dad1 +2025-08-19 01:45:12 - INFO - vision_config is None, using default vision config +2025-08-19 01:45:42 - INFO - Tokens per second: 10.81279707721416, Peak GPU memory MB: 11824.375 +2025-08-19 01:45:42 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] Inference time: 48.60 seconds, CPU usage: 35.6%, CPU core utilization: [25.0, 52.5, 36.8, 28.1] +2025-08-19 01:45:42 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] Cleaned up temporary file: temp_videos/c21259e8-cf6f-4def-8187-054b3e96dad1.mp4 +2025-08-19 01:45:42 - INFO - [c21259e8-cf6f-4def-8187-054b3e96dad1] Cleaned up temporary frame directory: temp_videos/c21259e8-cf6f-4def-8187-054b3e96dad1 +2025-08-19 01:45:42 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_013.mp4' +2025-08-19 01:45:42 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] Video saved to temporary file: temp_videos/2893b5f9-ff91-49b9-a805-5cfb913513dc.mp4 +2025-08-19 01:45:42 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:45:47 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:45:47 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] 30 frames saved to temp_videos/2893b5f9-ff91-49b9-a805-5cfb913513dc +2025-08-19 01:46:00 - INFO - vision_config is None, using default vision config +2025-08-19 01:46:29 - INFO - Tokens per second: 10.750196309237642, Peak GPU memory MB: 11824.375 +2025-08-19 01:46:29 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] Inference time: 47.04 seconds, CPU usage: 33.7%, CPU core utilization: [22.7, 33.2, 16.0, 63.0] +2025-08-19 01:46:29 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] Cleaned up temporary file: temp_videos/2893b5f9-ff91-49b9-a805-5cfb913513dc.mp4 +2025-08-19 01:46:29 - INFO - [2893b5f9-ff91-49b9-a805-5cfb913513dc] Cleaned up temporary frame directory: temp_videos/2893b5f9-ff91-49b9-a805-5cfb913513dc +2025-08-19 01:46:29 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_014.mp4' +2025-08-19 01:46:29 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] Video saved to temporary file: temp_videos/6a1d46d1-8d12-4277-b881-67852e9ec9fc.mp4 +2025-08-19 01:46:29 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:46:33 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:46:33 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] 30 frames saved to temp_videos/6a1d46d1-8d12-4277-b881-67852e9ec9fc +2025-08-19 01:46:46 - INFO - vision_config is None, using default vision config +2025-08-19 01:47:09 - INFO - Tokens per second: 9.95437319443051, Peak GPU memory MB: 11824.375 +2025-08-19 01:47:09 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] Inference time: 40.29 seconds, CPU usage: 33.9%, CPU core utilization: [14.9, 36.2, 35.4, 49.2] +2025-08-19 01:47:09 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] Cleaned up temporary file: temp_videos/6a1d46d1-8d12-4277-b881-67852e9ec9fc.mp4 +2025-08-19 01:47:09 - INFO - [6a1d46d1-8d12-4277-b881-67852e9ec9fc] Cleaned up temporary frame directory: temp_videos/6a1d46d1-8d12-4277-b881-67852e9ec9fc +2025-08-19 01:47:09 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_015.mp4' +2025-08-19 01:47:09 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] Video saved to temporary file: temp_videos/b2144dd4-543c-49c4-b47a-ac4a270fbb05.mp4 +2025-08-19 01:47:09 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:47:14 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:47:15 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] 30 frames saved to temp_videos/b2144dd4-543c-49c4-b47a-ac4a270fbb05 +2025-08-19 01:47:27 - INFO - vision_config is None, using default vision config +2025-08-19 01:47:56 - INFO - Tokens per second: 10.678644069294082, Peak GPU memory MB: 11824.375 +2025-08-19 01:47:56 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] Inference time: 46.50 seconds, CPU usage: 34.2%, CPU core utilization: [30.7, 18.2, 50.7, 37.2] +2025-08-19 01:47:56 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] Cleaned up temporary file: temp_videos/b2144dd4-543c-49c4-b47a-ac4a270fbb05.mp4 +2025-08-19 01:47:56 - INFO - [b2144dd4-543c-49c4-b47a-ac4a270fbb05] Cleaned up temporary frame directory: temp_videos/b2144dd4-543c-49c4-b47a-ac4a270fbb05 +2025-08-19 01:47:56 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_016.mp4' +2025-08-19 01:47:56 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] Video saved to temporary file: temp_videos/b968ad5e-f870-4d22-9878-aa9b5b0a119d.mp4 +2025-08-19 01:47:56 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:48:00 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:48:00 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] 30 frames saved to temp_videos/b968ad5e-f870-4d22-9878-aa9b5b0a119d +2025-08-19 01:48:13 - INFO - vision_config is None, using default vision config +2025-08-19 01:48:53 - INFO - Tokens per second: 11.436392353304559, Peak GPU memory MB: 11824.375 +2025-08-19 01:48:53 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] Inference time: 57.42 seconds, CPU usage: 31.0%, CPU core utilization: [32.8, 51.8, 16.3, 23.3] +2025-08-19 01:48:53 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] Cleaned up temporary file: temp_videos/b968ad5e-f870-4d22-9878-aa9b5b0a119d.mp4 +2025-08-19 01:48:53 - INFO - [b968ad5e-f870-4d22-9878-aa9b5b0a119d] Cleaned up temporary frame directory: temp_videos/b968ad5e-f870-4d22-9878-aa9b5b0a119d +2025-08-19 01:48:53 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_017.mp4' +2025-08-19 01:48:53 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] Video saved to temporary file: temp_videos/3ddab2f0-f085-4d1f-8968-7a2815622372.mp4 +2025-08-19 01:48:53 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:48:59 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:48:59 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] 30 frames saved to temp_videos/3ddab2f0-f085-4d1f-8968-7a2815622372 +2025-08-19 01:49:12 - INFO - vision_config is None, using default vision config +2025-08-19 01:49:43 - INFO - Tokens per second: 10.977650622627742, Peak GPU memory MB: 11824.375 +2025-08-19 01:49:43 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] Inference time: 50.17 seconds, CPU usage: 33.8%, CPU core utilization: [29.7, 20.5, 31.8, 53.2] +2025-08-19 01:49:43 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] Cleaned up temporary file: temp_videos/3ddab2f0-f085-4d1f-8968-7a2815622372.mp4 +2025-08-19 01:49:43 - INFO - [3ddab2f0-f085-4d1f-8968-7a2815622372] Cleaned up temporary frame directory: temp_videos/3ddab2f0-f085-4d1f-8968-7a2815622372 +2025-08-19 01:49:43 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_018.mp4' +2025-08-19 01:49:43 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] Video saved to temporary file: temp_videos/64082720-8509-4a38-a4e8-21caaaa28d68.mp4 +2025-08-19 01:49:43 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:49:48 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:49:48 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] 30 frames saved to temp_videos/64082720-8509-4a38-a4e8-21caaaa28d68 +2025-08-19 01:50:01 - INFO - vision_config is None, using default vision config +2025-08-19 01:50:30 - INFO - Tokens per second: 10.787881404092886, Peak GPU memory MB: 11824.375 +2025-08-19 01:50:30 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] Inference time: 46.92 seconds, CPU usage: 33.3%, CPU core utilization: [15.0, 23.0, 64.7, 30.6] +2025-08-19 01:50:30 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] Cleaned up temporary file: temp_videos/64082720-8509-4a38-a4e8-21caaaa28d68.mp4 +2025-08-19 01:50:30 - INFO - [64082720-8509-4a38-a4e8-21caaaa28d68] Cleaned up temporary frame directory: temp_videos/64082720-8509-4a38-a4e8-21caaaa28d68 +2025-08-19 01:50:30 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_019.mp4' +2025-08-19 01:50:30 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] Video saved to temporary file: temp_videos/287d15c6-2c14-4a24-8d66-ea1a1c087723.mp4 +2025-08-19 01:50:30 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:50:36 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:50:36 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] 30 frames saved to temp_videos/287d15c6-2c14-4a24-8d66-ea1a1c087723 +2025-08-19 01:50:49 - INFO - vision_config is None, using default vision config +2025-08-19 01:51:16 - INFO - Tokens per second: 10.528072417044749, Peak GPU memory MB: 11824.375 +2025-08-19 01:51:16 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] Inference time: 45.31 seconds, CPU usage: 34.1%, CPU core utilization: [66.5, 26.9, 29.2, 13.9] +2025-08-19 01:51:16 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] Cleaned up temporary file: temp_videos/287d15c6-2c14-4a24-8d66-ea1a1c087723.mp4 +2025-08-19 01:51:16 - INFO - [287d15c6-2c14-4a24-8d66-ea1a1c087723] Cleaned up temporary frame directory: temp_videos/287d15c6-2c14-4a24-8d66-ea1a1c087723 +2025-08-19 01:51:16 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_020.mp4' +2025-08-19 01:51:16 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] Video saved to temporary file: temp_videos/baccf9ea-be8e-4c3e-bd4d-44d91751d8a3.mp4 +2025-08-19 01:51:16 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:51:21 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:51:21 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] 30 frames saved to temp_videos/baccf9ea-be8e-4c3e-bd4d-44d91751d8a3 +2025-08-19 01:51:34 - INFO - vision_config is None, using default vision config +2025-08-19 01:51:56 - INFO - Tokens per second: 10.013143127227265, Peak GPU memory MB: 11824.375 +2025-08-19 01:51:56 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] Inference time: 40.79 seconds, CPU usage: 50.2%, CPU core utilization: [55.2, 35.5, 70.4, 39.7] +2025-08-19 01:51:56 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] Cleaned up temporary file: temp_videos/baccf9ea-be8e-4c3e-bd4d-44d91751d8a3.mp4 +2025-08-19 01:51:56 - INFO - [baccf9ea-be8e-4c3e-bd4d-44d91751d8a3] Cleaned up temporary frame directory: temp_videos/baccf9ea-be8e-4c3e-bd4d-44d91751d8a3 +2025-08-19 01:51:57 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_021.mp4' +2025-08-19 01:51:57 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] Video saved to temporary file: temp_videos/74d5f1c8-04e1-4be4-b8c6-ff57447ffe57.mp4 +2025-08-19 01:51:57 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:52:02 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:52:02 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] 30 frames saved to temp_videos/74d5f1c8-04e1-4be4-b8c6-ff57447ffe57 +2025-08-19 01:52:15 - INFO - vision_config is None, using default vision config +2025-08-19 01:52:44 - INFO - Tokens per second: 10.80753428151764, Peak GPU memory MB: 11824.375 +2025-08-19 01:52:44 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] Inference time: 47.44 seconds, CPU usage: 33.8%, CPU core utilization: [21.7, 32.7, 49.9, 30.7] +2025-08-19 01:52:44 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] Cleaned up temporary file: temp_videos/74d5f1c8-04e1-4be4-b8c6-ff57447ffe57.mp4 +2025-08-19 01:52:44 - INFO - [74d5f1c8-04e1-4be4-b8c6-ff57447ffe57] Cleaned up temporary frame directory: temp_videos/74d5f1c8-04e1-4be4-b8c6-ff57447ffe57 +2025-08-19 01:52:44 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_022.mp4' +2025-08-19 01:52:44 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] Video saved to temporary file: temp_videos/3fa3173d-a3e7-4543-b51b-0740cf6590fe.mp4 +2025-08-19 01:52:44 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:52:49 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:52:49 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] 30 frames saved to temp_videos/3fa3173d-a3e7-4543-b51b-0740cf6590fe +2025-08-19 01:53:02 - INFO - vision_config is None, using default vision config +2025-08-19 01:53:38 - INFO - Tokens per second: 11.271056271043205, Peak GPU memory MB: 11824.375 +2025-08-19 01:53:38 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] Inference time: 54.02 seconds, CPU usage: 32.1%, CPU core utilization: [14.2, 64.7, 38.0, 11.4] +2025-08-19 01:53:38 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] Cleaned up temporary file: temp_videos/3fa3173d-a3e7-4543-b51b-0740cf6590fe.mp4 +2025-08-19 01:53:38 - INFO - [3fa3173d-a3e7-4543-b51b-0740cf6590fe] Cleaned up temporary frame directory: temp_videos/3fa3173d-a3e7-4543-b51b-0740cf6590fe +2025-08-19 01:53:38 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_023.mp4' +2025-08-19 01:53:38 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] Video saved to temporary file: temp_videos/f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1.mp4 +2025-08-19 01:53:38 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:53:43 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:53:43 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] 30 frames saved to temp_videos/f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1 +2025-08-19 01:53:56 - INFO - vision_config is None, using default vision config +2025-08-19 01:54:23 - INFO - Tokens per second: 10.584057777083409, Peak GPU memory MB: 11824.375 +2025-08-19 01:54:23 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] Inference time: 45.30 seconds, CPU usage: 33.8%, CPU core utilization: [27.8, 11.4, 55.2, 40.8] +2025-08-19 01:54:23 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] Cleaned up temporary file: temp_videos/f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1.mp4 +2025-08-19 01:54:23 - INFO - [f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1] Cleaned up temporary frame directory: temp_videos/f2c7c4ac-514a-4ba2-acaa-363c9f2e16c1 +2025-08-19 01:54:23 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_024.mp4' +2025-08-19 01:54:23 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] Video saved to temporary file: temp_videos/49e141c3-89cb-4d64-8e6c-fe5f5be26dc0.mp4 +2025-08-19 01:54:23 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:54:28 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:54:28 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] 30 frames saved to temp_videos/49e141c3-89cb-4d64-8e6c-fe5f5be26dc0 +2025-08-19 01:54:41 - INFO - vision_config is None, using default vision config +2025-08-19 01:55:07 - INFO - Tokens per second: 10.459799750467855, Peak GPU memory MB: 11824.375 +2025-08-19 01:55:07 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] Inference time: 43.99 seconds, CPU usage: 33.8%, CPU core utilization: [22.6, 28.1, 58.6, 25.9] +2025-08-19 01:55:07 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] Cleaned up temporary file: temp_videos/49e141c3-89cb-4d64-8e6c-fe5f5be26dc0.mp4 +2025-08-19 01:55:07 - INFO - [49e141c3-89cb-4d64-8e6c-fe5f5be26dc0] Cleaned up temporary frame directory: temp_videos/49e141c3-89cb-4d64-8e6c-fe5f5be26dc0 +2025-08-19 01:55:49 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 01:55:49 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] Video saved to temporary file: temp_videos/8582051e-6ca3-4710-b3fa-332e5371ab3a.mp4 +2025-08-19 01:55:49 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:55:52 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:55:52 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] 30 frames saved to temp_videos/8582051e-6ca3-4710-b3fa-332e5371ab3a +2025-08-19 01:56:05 - INFO - vision_config is None, using default vision config +2025-08-19 01:56:31 - INFO - Tokens per second: 10.459373498424311, Peak GPU memory MB: 11824.375 +2025-08-19 01:56:31 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] Inference time: 41.89 seconds, CPU usage: 17.0%, CPU core utilization: [15.1, 18.5, 27.0, 7.5] +2025-08-19 01:56:31 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] Cleaned up temporary file: temp_videos/8582051e-6ca3-4710-b3fa-332e5371ab3a.mp4 +2025-08-19 01:56:31 - INFO - [8582051e-6ca3-4710-b3fa-332e5371ab3a] Cleaned up temporary frame directory: temp_videos/8582051e-6ca3-4710-b3fa-332e5371ab3a +2025-08-19 01:56:31 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_002.mp4' +2025-08-19 01:56:31 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] Video saved to temporary file: temp_videos/1ea63879-2b96-4512-843c-4c2fe0b32d56.mp4 +2025-08-19 01:56:31 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:56:37 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:56:37 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] 30 frames saved to temp_videos/1ea63879-2b96-4512-843c-4c2fe0b32d56 +2025-08-19 01:56:49 - INFO - vision_config is None, using default vision config +2025-08-19 01:57:16 - INFO - Tokens per second: 10.521444099900755, Peak GPU memory MB: 11824.375 +2025-08-19 01:57:16 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] Inference time: 45.18 seconds, CPU usage: 34.8%, CPU core utilization: [27.2, 30.1, 57.7, 24.0] +2025-08-19 01:57:16 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] Cleaned up temporary file: temp_videos/1ea63879-2b96-4512-843c-4c2fe0b32d56.mp4 +2025-08-19 01:57:16 - INFO - [1ea63879-2b96-4512-843c-4c2fe0b32d56] Cleaned up temporary frame directory: temp_videos/1ea63879-2b96-4512-843c-4c2fe0b32d56 +2025-08-19 01:57:16 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_003.mp4' +2025-08-19 01:57:16 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] Video saved to temporary file: temp_videos/f5145952-2d58-4e30-8ed0-e883cb9c33d2.mp4 +2025-08-19 01:57:16 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:57:21 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:57:21 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] 30 frames saved to temp_videos/f5145952-2d58-4e30-8ed0-e883cb9c33d2 +2025-08-19 01:57:34 - INFO - vision_config is None, using default vision config +2025-08-19 01:57:57 - INFO - Tokens per second: 10.026139095405306, Peak GPU memory MB: 11824.375 +2025-08-19 01:57:57 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] Inference time: 41.03 seconds, CPU usage: 34.5%, CPU core utilization: [49.0, 20.1, 32.4, 36.6] +2025-08-19 01:57:57 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] Cleaned up temporary file: temp_videos/f5145952-2d58-4e30-8ed0-e883cb9c33d2.mp4 +2025-08-19 01:57:57 - INFO - [f5145952-2d58-4e30-8ed0-e883cb9c33d2] Cleaned up temporary frame directory: temp_videos/f5145952-2d58-4e30-8ed0-e883cb9c33d2 +2025-08-19 01:57:57 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_004.mp4' +2025-08-19 01:57:57 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] Video saved to temporary file: temp_videos/e3cf41f4-2ad6-4236-ae33-3a5e359e3e12.mp4 +2025-08-19 01:57:57 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:58:02 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:58:02 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] 30 frames saved to temp_videos/e3cf41f4-2ad6-4236-ae33-3a5e359e3e12 +2025-08-19 01:58:15 - INFO - vision_config is None, using default vision config +2025-08-19 01:58:47 - INFO - Tokens per second: 11.01241993759931, Peak GPU memory MB: 11824.375 +2025-08-19 01:58:47 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] Inference time: 49.77 seconds, CPU usage: 32.8%, CPU core utilization: [26.8, 13.1, 79.1, 12.0] +2025-08-19 01:58:47 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] Cleaned up temporary file: temp_videos/e3cf41f4-2ad6-4236-ae33-3a5e359e3e12.mp4 +2025-08-19 01:58:47 - INFO - [e3cf41f4-2ad6-4236-ae33-3a5e359e3e12] Cleaned up temporary frame directory: temp_videos/e3cf41f4-2ad6-4236-ae33-3a5e359e3e12 +2025-08-19 01:58:47 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_005.mp4' +2025-08-19 01:58:47 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] Video saved to temporary file: temp_videos/503dd6a1-31f7-4858-8147-e63031a49a4d.mp4 +2025-08-19 01:58:47 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:58:52 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:58:52 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] 30 frames saved to temp_videos/503dd6a1-31f7-4858-8147-e63031a49a4d +2025-08-19 01:59:05 - INFO - vision_config is None, using default vision config +2025-08-19 01:59:32 - INFO - Tokens per second: 10.591706857753069, Peak GPU memory MB: 11824.375 +2025-08-19 01:59:32 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] Inference time: 45.01 seconds, CPU usage: 34.2%, CPU core utilization: [40.7, 15.3, 26.2, 54.4] +2025-08-19 01:59:32 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] Cleaned up temporary file: temp_videos/503dd6a1-31f7-4858-8147-e63031a49a4d.mp4 +2025-08-19 01:59:32 - INFO - [503dd6a1-31f7-4858-8147-e63031a49a4d] Cleaned up temporary frame directory: temp_videos/503dd6a1-31f7-4858-8147-e63031a49a4d +2025-08-19 01:59:32 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_006.mp4' +2025-08-19 01:59:32 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] Video saved to temporary file: temp_videos/1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda.mp4 +2025-08-19 01:59:32 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:59:38 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:59:38 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] 30 frames saved to temp_videos/1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda +2025-08-19 01:59:51 - INFO - vision_config is None, using default vision config +2025-08-19 02:00:22 - INFO - Tokens per second: 10.896315328259965, Peak GPU memory MB: 11824.375 +2025-08-19 02:00:22 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] Inference time: 49.56 seconds, CPU usage: 34.5%, CPU core utilization: [17.4, 61.3, 13.1, 46.3] +2025-08-19 02:00:22 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] Cleaned up temporary file: temp_videos/1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda.mp4 +2025-08-19 02:00:22 - INFO - [1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda] Cleaned up temporary frame directory: temp_videos/1d4c6c16-0d5b-4a6d-99ef-c03f51be1bda +2025-08-19 02:00:22 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_007.mp4' +2025-08-19 02:00:22 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] Video saved to temporary file: temp_videos/66b06d03-2766-4af8-ad8f-a6fbabe537b1.mp4 +2025-08-19 02:00:22 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:00:27 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:00:27 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] 30 frames saved to temp_videos/66b06d03-2766-4af8-ad8f-a6fbabe537b1 +2025-08-19 02:00:40 - INFO - vision_config is None, using default vision config +2025-08-19 02:01:01 - INFO - Tokens per second: 9.590173753322563, Peak GPU memory MB: 11824.375 +2025-08-19 02:01:01 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] Inference time: 38.85 seconds, CPU usage: 35.6%, CPU core utilization: [35.1, 15.9, 76.1, 14.9] +2025-08-19 02:01:01 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] Cleaned up temporary file: temp_videos/66b06d03-2766-4af8-ad8f-a6fbabe537b1.mp4 +2025-08-19 02:01:01 - INFO - [66b06d03-2766-4af8-ad8f-a6fbabe537b1] Cleaned up temporary frame directory: temp_videos/66b06d03-2766-4af8-ad8f-a6fbabe537b1 +2025-08-19 02:01:01 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_008.mp4' +2025-08-19 02:01:01 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] Video saved to temporary file: temp_videos/952b41e2-c5c5-42f2-a5bb-1145e3e1fb34.mp4 +2025-08-19 02:01:01 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:01:06 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:01:06 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] 30 frames saved to temp_videos/952b41e2-c5c5-42f2-a5bb-1145e3e1fb34 +2025-08-19 02:01:19 - INFO - vision_config is None, using default vision config +2025-08-19 02:01:46 - INFO - Tokens per second: 10.608430309528273, Peak GPU memory MB: 11824.375 +2025-08-19 02:01:46 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] Inference time: 45.49 seconds, CPU usage: 34.3%, CPU core utilization: [49.6, 30.9, 40.9, 15.7] +2025-08-19 02:01:46 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] Cleaned up temporary file: temp_videos/952b41e2-c5c5-42f2-a5bb-1145e3e1fb34.mp4 +2025-08-19 02:01:46 - INFO - [952b41e2-c5c5-42f2-a5bb-1145e3e1fb34] Cleaned up temporary frame directory: temp_videos/952b41e2-c5c5-42f2-a5bb-1145e3e1fb34 +2025-08-19 02:01:46 - INFO - [51cad348-55db-43de-9338-3051e9131844] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_009.mp4' +2025-08-19 02:01:46 - INFO - [51cad348-55db-43de-9338-3051e9131844] Video saved to temporary file: temp_videos/51cad348-55db-43de-9338-3051e9131844.mp4 +2025-08-19 02:01:46 - INFO - [51cad348-55db-43de-9338-3051e9131844] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:01:51 - INFO - [51cad348-55db-43de-9338-3051e9131844] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:01:51 - INFO - [51cad348-55db-43de-9338-3051e9131844] 30 frames saved to temp_videos/51cad348-55db-43de-9338-3051e9131844 +2025-08-19 02:02:04 - INFO - vision_config is None, using default vision config +2025-08-19 02:02:37 - INFO - Tokens per second: 11.082226675537616, Peak GPU memory MB: 11824.375 +2025-08-19 02:02:37 - INFO - [51cad348-55db-43de-9338-3051e9131844] Inference time: 50.99 seconds, CPU usage: 33.1%, CPU core utilization: [33.1, 16.3, 62.2, 20.7] +2025-08-19 02:02:37 - INFO - [51cad348-55db-43de-9338-3051e9131844] Cleaned up temporary file: temp_videos/51cad348-55db-43de-9338-3051e9131844.mp4 +2025-08-19 02:02:37 - INFO - [51cad348-55db-43de-9338-3051e9131844] Cleaned up temporary frame directory: temp_videos/51cad348-55db-43de-9338-3051e9131844 +2025-08-19 02:02:37 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_010.mp4' +2025-08-19 02:02:37 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] Video saved to temporary file: temp_videos/41b3e8c9-9970-45ff-abb7-6a1f7e8964fb.mp4 +2025-08-19 02:02:37 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:02:42 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:02:42 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] 30 frames saved to temp_videos/41b3e8c9-9970-45ff-abb7-6a1f7e8964fb +2025-08-19 02:02:55 - INFO - vision_config is None, using default vision config +2025-08-19 02:03:22 - INFO - Tokens per second: 10.551310576211458, Peak GPU memory MB: 11824.375 +2025-08-19 02:03:22 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] Inference time: 44.85 seconds, CPU usage: 33.3%, CPU core utilization: [21.1, 35.7, 30.0, 46.5] +2025-08-19 02:03:22 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] Cleaned up temporary file: temp_videos/41b3e8c9-9970-45ff-abb7-6a1f7e8964fb.mp4 +2025-08-19 02:03:22 - INFO - [41b3e8c9-9970-45ff-abb7-6a1f7e8964fb] Cleaned up temporary frame directory: temp_videos/41b3e8c9-9970-45ff-abb7-6a1f7e8964fb +2025-08-19 02:03:22 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_011.mp4' +2025-08-19 02:03:22 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] Video saved to temporary file: temp_videos/4393c650-c393-4346-bc0b-a9ef5e9ff838.mp4 +2025-08-19 02:03:22 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:03:27 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:03:27 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] 30 frames saved to temp_videos/4393c650-c393-4346-bc0b-a9ef5e9ff838 +2025-08-19 02:03:40 - INFO - vision_config is None, using default vision config +2025-08-19 02:04:10 - INFO - Tokens per second: 10.880078766885976, Peak GPU memory MB: 11824.375 +2025-08-19 02:04:10 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] Inference time: 48.24 seconds, CPU usage: 33.2%, CPU core utilization: [44.6, 26.5, 48.9, 12.8] +2025-08-19 02:04:10 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] Cleaned up temporary file: temp_videos/4393c650-c393-4346-bc0b-a9ef5e9ff838.mp4 +2025-08-19 02:04:10 - INFO - [4393c650-c393-4346-bc0b-a9ef5e9ff838] Cleaned up temporary frame directory: temp_videos/4393c650-c393-4346-bc0b-a9ef5e9ff838 +2025-08-19 02:04:10 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_012.mp4' +2025-08-19 02:04:10 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] Video saved to temporary file: temp_videos/63ee585b-d4ad-4f77-9c46-b1d0ee9660d5.mp4 +2025-08-19 02:04:10 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:04:16 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:04:16 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] 30 frames saved to temp_videos/63ee585b-d4ad-4f77-9c46-b1d0ee9660d5 +2025-08-19 02:04:29 - INFO - vision_config is None, using default vision config +2025-08-19 02:05:06 - INFO - Tokens per second: 11.316073008449894, Peak GPU memory MB: 11824.375 +2025-08-19 02:05:06 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] Inference time: 55.46 seconds, CPU usage: 32.8%, CPU core utilization: [30.7, 22.7, 65.4, 12.2] +2025-08-19 02:05:06 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] Cleaned up temporary file: temp_videos/63ee585b-d4ad-4f77-9c46-b1d0ee9660d5.mp4 +2025-08-19 02:05:06 - INFO - [63ee585b-d4ad-4f77-9c46-b1d0ee9660d5] Cleaned up temporary frame directory: temp_videos/63ee585b-d4ad-4f77-9c46-b1d0ee9660d5 +2025-08-19 02:05:06 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_013.mp4' +2025-08-19 02:05:06 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] Video saved to temporary file: temp_videos/cd415598-0457-4f7c-9c0e-c48a9a385529.mp4 +2025-08-19 02:05:06 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:05:11 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:05:11 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] 30 frames saved to temp_videos/cd415598-0457-4f7c-9c0e-c48a9a385529 +2025-08-19 02:05:24 - INFO - vision_config is None, using default vision config +2025-08-19 02:05:48 - INFO - Tokens per second: 10.139085446565758, Peak GPU memory MB: 11824.375 +2025-08-19 02:05:48 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] Inference time: 41.62 seconds, CPU usage: 34.4%, CPU core utilization: [43.3, 14.6, 67.4, 12.2] +2025-08-19 02:05:48 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] Cleaned up temporary file: temp_videos/cd415598-0457-4f7c-9c0e-c48a9a385529.mp4 +2025-08-19 02:05:48 - INFO - [cd415598-0457-4f7c-9c0e-c48a9a385529] Cleaned up temporary frame directory: temp_videos/cd415598-0457-4f7c-9c0e-c48a9a385529 +2025-08-19 02:05:48 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_014.mp4' +2025-08-19 02:05:48 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] Video saved to temporary file: temp_videos/d767da5c-73c4-4e2f-b5bd-53f01d5e199e.mp4 +2025-08-19 02:05:48 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:05:52 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:05:52 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] 30 frames saved to temp_videos/d767da5c-73c4-4e2f-b5bd-53f01d5e199e +2025-08-19 02:06:05 - INFO - vision_config is None, using default vision config +2025-08-19 02:06:29 - INFO - Tokens per second: 10.099402253183086, Peak GPU memory MB: 11824.375 +2025-08-19 02:06:29 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] Inference time: 41.11 seconds, CPU usage: 33.9%, CPU core utilization: [40.9, 18.6, 62.6, 13.5] +2025-08-19 02:06:29 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] Cleaned up temporary file: temp_videos/d767da5c-73c4-4e2f-b5bd-53f01d5e199e.mp4 +2025-08-19 02:06:29 - INFO - [d767da5c-73c4-4e2f-b5bd-53f01d5e199e] Cleaned up temporary frame directory: temp_videos/d767da5c-73c4-4e2f-b5bd-53f01d5e199e +2025-08-19 02:06:29 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_015.mp4' +2025-08-19 02:06:29 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] Video saved to temporary file: temp_videos/e58b05d0-3bc8-4979-b14b-942d95c2e3ef.mp4 +2025-08-19 02:06:29 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:06:34 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:06:34 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] 30 frames saved to temp_videos/e58b05d0-3bc8-4979-b14b-942d95c2e3ef +2025-08-19 02:06:47 - INFO - vision_config is None, using default vision config +2025-08-19 02:07:19 - INFO - Tokens per second: 11.047986460564802, Peak GPU memory MB: 11824.375 +2025-08-19 02:07:19 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] Inference time: 50.63 seconds, CPU usage: 33.6%, CPU core utilization: [34.3, 14.5, 72.1, 13.3] +2025-08-19 02:07:20 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] Cleaned up temporary file: temp_videos/e58b05d0-3bc8-4979-b14b-942d95c2e3ef.mp4 +2025-08-19 02:07:20 - INFO - [e58b05d0-3bc8-4979-b14b-942d95c2e3ef] Cleaned up temporary frame directory: temp_videos/e58b05d0-3bc8-4979-b14b-942d95c2e3ef +2025-08-19 02:07:20 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_016.mp4' +2025-08-19 02:07:20 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] Video saved to temporary file: temp_videos/7f388f20-df20-4494-81cf-2c4dbbb55c24.mp4 +2025-08-19 02:07:20 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:07:25 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:07:25 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] 30 frames saved to temp_videos/7f388f20-df20-4494-81cf-2c4dbbb55c24 +2025-08-19 02:07:38 - INFO - vision_config is None, using default vision config +2025-08-19 02:08:03 - INFO - Tokens per second: 10.276083296878193, Peak GPU memory MB: 11824.375 +2025-08-19 02:08:03 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] Inference time: 42.98 seconds, CPU usage: 34.5%, CPU core utilization: [48.9, 17.9, 49.7, 21.6] +2025-08-19 02:08:03 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] Cleaned up temporary file: temp_videos/7f388f20-df20-4494-81cf-2c4dbbb55c24.mp4 +2025-08-19 02:08:03 - INFO - [7f388f20-df20-4494-81cf-2c4dbbb55c24] Cleaned up temporary frame directory: temp_videos/7f388f20-df20-4494-81cf-2c4dbbb55c24 +2025-08-19 02:08:03 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_017.mp4' +2025-08-19 02:08:03 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] Video saved to temporary file: temp_videos/065e7051-f3d7-44d7-9df4-3ec85f833683.mp4 +2025-08-19 02:08:03 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:08:08 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:08:08 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] 30 frames saved to temp_videos/065e7051-f3d7-44d7-9df4-3ec85f833683 +2025-08-19 02:08:21 - INFO - vision_config is None, using default vision config +2025-08-19 02:08:43 - INFO - Tokens per second: 9.871157401870102, Peak GPU memory MB: 11824.375 +2025-08-19 02:08:43 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] Inference time: 40.32 seconds, CPU usage: 34.5%, CPU core utilization: [28.7, 13.0, 82.0, 14.2] +2025-08-19 02:08:43 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] Cleaned up temporary file: temp_videos/065e7051-f3d7-44d7-9df4-3ec85f833683.mp4 +2025-08-19 02:08:43 - INFO - [065e7051-f3d7-44d7-9df4-3ec85f833683] Cleaned up temporary frame directory: temp_videos/065e7051-f3d7-44d7-9df4-3ec85f833683 +2025-08-19 02:08:43 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_018.mp4' +2025-08-19 02:08:43 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] Video saved to temporary file: temp_videos/3a448a91-9d31-4350-a00a-5e0d9a3e65c5.mp4 +2025-08-19 02:08:43 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:08:48 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:08:48 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] 30 frames saved to temp_videos/3a448a91-9d31-4350-a00a-5e0d9a3e65c5 +2025-08-19 02:09:01 - INFO - vision_config is None, using default vision config +2025-08-19 02:09:27 - INFO - Tokens per second: 10.527802025402586, Peak GPU memory MB: 11824.375 +2025-08-19 02:09:27 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] Inference time: 44.41 seconds, CPU usage: 33.7%, CPU core utilization: [38.7, 49.8, 27.9, 18.3] +2025-08-19 02:09:27 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] Cleaned up temporary file: temp_videos/3a448a91-9d31-4350-a00a-5e0d9a3e65c5.mp4 +2025-08-19 02:09:27 - INFO - [3a448a91-9d31-4350-a00a-5e0d9a3e65c5] Cleaned up temporary frame directory: temp_videos/3a448a91-9d31-4350-a00a-5e0d9a3e65c5 +2025-08-19 02:09:28 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_019.mp4' +2025-08-19 02:09:28 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] Video saved to temporary file: temp_videos/f86185e1-3df4-4b24-87c3-25b7006b3847.mp4 +2025-08-19 02:09:28 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:09:33 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:09:33 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] 30 frames saved to temp_videos/f86185e1-3df4-4b24-87c3-25b7006b3847 +2025-08-19 02:09:46 - INFO - vision_config is None, using default vision config +2025-08-19 02:10:09 - INFO - Tokens per second: 9.991467008425984, Peak GPU memory MB: 11824.375 +2025-08-19 02:10:09 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] Inference time: 41.05 seconds, CPU usage: 34.8%, CPU core utilization: [27.7, 55.4, 40.1, 16.1] +2025-08-19 02:10:09 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] Cleaned up temporary file: temp_videos/f86185e1-3df4-4b24-87c3-25b7006b3847.mp4 +2025-08-19 02:10:09 - INFO - [f86185e1-3df4-4b24-87c3-25b7006b3847] Cleaned up temporary frame directory: temp_videos/f86185e1-3df4-4b24-87c3-25b7006b3847 +2025-08-19 02:10:09 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_020.mp4' +2025-08-19 02:10:09 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] Video saved to temporary file: temp_videos/86d1ce1c-a659-4370-8d5e-2a04d68fb02d.mp4 +2025-08-19 02:10:09 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:10:14 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:10:14 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] 30 frames saved to temp_videos/86d1ce1c-a659-4370-8d5e-2a04d68fb02d +2025-08-19 02:10:27 - INFO - vision_config is None, using default vision config +2025-08-19 02:10:56 - INFO - Tokens per second: 10.789004911780786, Peak GPU memory MB: 11824.375 +2025-08-19 02:10:56 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] Inference time: 47.12 seconds, CPU usage: 33.5%, CPU core utilization: [26.3, 11.4, 57.7, 38.4] +2025-08-19 02:10:56 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] Cleaned up temporary file: temp_videos/86d1ce1c-a659-4370-8d5e-2a04d68fb02d.mp4 +2025-08-19 02:10:56 - INFO - [86d1ce1c-a659-4370-8d5e-2a04d68fb02d] Cleaned up temporary frame directory: temp_videos/86d1ce1c-a659-4370-8d5e-2a04d68fb02d +2025-08-19 02:10:56 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_021.mp4' +2025-08-19 02:10:56 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] Video saved to temporary file: temp_videos/bbf0d618-6d38-4015-a288-e1ed0f93febb.mp4 +2025-08-19 02:10:56 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 02:11:01 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 02:11:01 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] 30 frames saved to temp_videos/bbf0d618-6d38-4015-a288-e1ed0f93febb +2025-08-19 02:11:14 - INFO - vision_config is None, using default vision config +2025-08-19 02:11:42 - INFO - Tokens per second: 10.710302542840308, Peak GPU memory MB: 11824.375 +2025-08-19 02:11:42 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] Inference time: 46.57 seconds, CPU usage: 33.3%, CPU core utilization: [35.4, 27.4, 14.8, 55.5] +2025-08-19 02:11:42 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] Cleaned up temporary file: temp_videos/bbf0d618-6d38-4015-a288-e1ed0f93febb.mp4 +2025-08-19 02:11:42 - INFO - [bbf0d618-6d38-4015-a288-e1ed0f93febb] Cleaned up temporary frame directory: temp_videos/bbf0d618-6d38-4015-a288-e1ed0f93febb diff --git a/API_Transformers/logs/MiniCPM-V-4/20250820_233455.log b/API_Transformers/logs/MiniCPM-V-4/20250820_233455.log new file mode 100644 index 0000000000000000000000000000000000000000..87f794c47df30011896b14d897f1afc7cb1772a6 --- /dev/null +++ b/API_Transformers/logs/MiniCPM-V-4/20250820_233455.log @@ -0,0 +1,742 @@ +2025-08-20 23:34:55 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-20 23:34:56 - INFO - vision_config is None, using default vision config +2025-08-20 23:36:01 - INFO - Model loaded in 65.64 seconds +2025-08-20 23:36:01 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-20 23:36:30 - INFO - [5373d18b-66ad-4a38-b02b-dbac9e400b89] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:36:30 - INFO - [5373d18b-66ad-4a38-b02b-dbac9e400b89] Video saved to temporary file: temp_videos/5373d18b-66ad-4a38-b02b-dbac9e400b89.mp4 +2025-08-20 23:36:30 - INFO - [5373d18b-66ad-4a38-b02b-dbac9e400b89] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:36:36 - INFO - [5373d18b-66ad-4a38-b02b-dbac9e400b89] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:36:36 - INFO - [5373d18b-66ad-4a38-b02b-dbac9e400b89] 30 frames saved to temp_videos/5373d18b-66ad-4a38-b02b-dbac9e400b89 +2025-08-20 23:36:53 - INFO - vision_config is None, using default vision config +2025-08-20 23:37:12 - INFO - Tokens per second: 8.779788779248046, Peak GPU memory MB: 11824.375 +2025-08-20 23:37:12 - INFO - [5373d18b-66ad-4a38-b02b-dbac9e400b89] Inference time: 41.20 seconds, CPU usage: 20.4%, CPU core utilization: [14.8, 25.6, 19.6, 21.6] +2025-08-20 23:37:12 - INFO - [5373d18b-66ad-4a38-b02b-dbac9e400b89] Cleaned up temporary frame directory: temp_videos/5373d18b-66ad-4a38-b02b-dbac9e400b89 +2025-08-20 23:37:12 - INFO - [cbcd04dd-9447-4344-812e-e4387f036fea] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-20 23:37:12 - INFO - [cbcd04dd-9447-4344-812e-e4387f036fea] Video saved to temporary file: temp_videos/cbcd04dd-9447-4344-812e-e4387f036fea.mp4 +2025-08-20 23:37:12 - INFO - [cbcd04dd-9447-4344-812e-e4387f036fea] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:37:17 - INFO - [cbcd04dd-9447-4344-812e-e4387f036fea] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:37:17 - INFO - [cbcd04dd-9447-4344-812e-e4387f036fea] 30 frames saved to temp_videos/cbcd04dd-9447-4344-812e-e4387f036fea +2025-08-20 23:37:29 - INFO - vision_config is None, using default vision config +2025-08-20 23:37:45 - INFO - Tokens per second: 8.238054802466655, Peak GPU memory MB: 11824.375 +2025-08-20 23:37:45 - INFO - [cbcd04dd-9447-4344-812e-e4387f036fea] Inference time: 33.71 seconds, CPU usage: 35.8%, CPU core utilization: [31.6, 59.3, 28.2, 24.1] +2025-08-20 23:37:45 - INFO - [cbcd04dd-9447-4344-812e-e4387f036fea] Cleaned up temporary frame directory: temp_videos/cbcd04dd-9447-4344-812e-e4387f036fea +2025-08-20 23:37:45 - INFO - [4ca8090e-ea72-431d-aa92-756575a05665] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-20 23:37:45 - INFO - [4ca8090e-ea72-431d-aa92-756575a05665] Video saved to temporary file: temp_videos/4ca8090e-ea72-431d-aa92-756575a05665.mp4 +2025-08-20 23:37:45 - INFO - [4ca8090e-ea72-431d-aa92-756575a05665] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:37:50 - INFO - [4ca8090e-ea72-431d-aa92-756575a05665] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:37:50 - INFO - [4ca8090e-ea72-431d-aa92-756575a05665] 30 frames saved to temp_videos/4ca8090e-ea72-431d-aa92-756575a05665 +2025-08-20 23:38:03 - INFO - vision_config is None, using default vision config +2025-08-20 23:38:26 - INFO - Tokens per second: 9.842206491452542, Peak GPU memory MB: 11824.375 +2025-08-20 23:38:26 - INFO - [4ca8090e-ea72-431d-aa92-756575a05665] Inference time: 40.18 seconds, CPU usage: 34.2%, CPU core utilization: [42.5, 14.8, 64.8, 14.6] +2025-08-20 23:38:26 - INFO - [4ca8090e-ea72-431d-aa92-756575a05665] Cleaned up temporary frame directory: temp_videos/4ca8090e-ea72-431d-aa92-756575a05665 +2025-08-20 23:38:26 - INFO - [e7c9965e-02c8-4472-bc73-adb425fc488d] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-20 23:38:26 - INFO - [e7c9965e-02c8-4472-bc73-adb425fc488d] Video saved to temporary file: temp_videos/e7c9965e-02c8-4472-bc73-adb425fc488d.mp4 +2025-08-20 23:38:26 - INFO - [e7c9965e-02c8-4472-bc73-adb425fc488d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:38:30 - INFO - [e7c9965e-02c8-4472-bc73-adb425fc488d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:38:30 - INFO - [e7c9965e-02c8-4472-bc73-adb425fc488d] 30 frames saved to temp_videos/e7c9965e-02c8-4472-bc73-adb425fc488d +2025-08-20 23:38:43 - INFO - vision_config is None, using default vision config +2025-08-20 23:38:57 - INFO - Tokens per second: 7.223886604016651, Peak GPU memory MB: 11824.375 +2025-08-20 23:38:57 - INFO - [e7c9965e-02c8-4472-bc73-adb425fc488d] Inference time: 31.57 seconds, CPU usage: 36.1%, CPU core utilization: [35.9, 19.2, 72.5, 16.5] +2025-08-20 23:38:57 - INFO - [e7c9965e-02c8-4472-bc73-adb425fc488d] Cleaned up temporary frame directory: temp_videos/e7c9965e-02c8-4472-bc73-adb425fc488d +2025-08-20 23:38:57 - INFO - [42ed726f-5d5b-4a66-bebd-387f3916eea3] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-20 23:38:57 - INFO - [42ed726f-5d5b-4a66-bebd-387f3916eea3] Video saved to temporary file: temp_videos/42ed726f-5d5b-4a66-bebd-387f3916eea3.mp4 +2025-08-20 23:38:57 - INFO - [42ed726f-5d5b-4a66-bebd-387f3916eea3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:39:02 - INFO - [42ed726f-5d5b-4a66-bebd-387f3916eea3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:39:02 - INFO - [42ed726f-5d5b-4a66-bebd-387f3916eea3] 30 frames saved to temp_videos/42ed726f-5d5b-4a66-bebd-387f3916eea3 +2025-08-20 23:39:15 - INFO - vision_config is None, using default vision config +2025-08-20 23:39:35 - INFO - Tokens per second: 9.414676082668533, Peak GPU memory MB: 11824.375 +2025-08-20 23:39:35 - INFO - [42ed726f-5d5b-4a66-bebd-387f3916eea3] Inference time: 38.23 seconds, CPU usage: 34.6%, CPU core utilization: [34.4, 30.3, 14.1, 59.5] +2025-08-20 23:39:35 - INFO - [42ed726f-5d5b-4a66-bebd-387f3916eea3] Cleaned up temporary frame directory: temp_videos/42ed726f-5d5b-4a66-bebd-387f3916eea3 +2025-08-20 23:39:35 - INFO - [770b60c1-2ad5-4fc8-a042-36f7397d63c8] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-20 23:39:35 - INFO - [770b60c1-2ad5-4fc8-a042-36f7397d63c8] Video saved to temporary file: temp_videos/770b60c1-2ad5-4fc8-a042-36f7397d63c8.mp4 +2025-08-20 23:39:35 - INFO - [770b60c1-2ad5-4fc8-a042-36f7397d63c8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:39:40 - INFO - [770b60c1-2ad5-4fc8-a042-36f7397d63c8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:39:40 - INFO - [770b60c1-2ad5-4fc8-a042-36f7397d63c8] 30 frames saved to temp_videos/770b60c1-2ad5-4fc8-a042-36f7397d63c8 +2025-08-20 23:39:53 - INFO - vision_config is None, using default vision config +2025-08-20 23:40:12 - INFO - Tokens per second: 9.018116223758131, Peak GPU memory MB: 11824.375 +2025-08-20 23:40:12 - INFO - [770b60c1-2ad5-4fc8-a042-36f7397d63c8] Inference time: 36.60 seconds, CPU usage: 35.1%, CPU core utilization: [47.3, 19.3, 54.1, 19.6] +2025-08-20 23:40:12 - INFO - [770b60c1-2ad5-4fc8-a042-36f7397d63c8] Cleaned up temporary frame directory: temp_videos/770b60c1-2ad5-4fc8-a042-36f7397d63c8 +2025-08-20 23:40:12 - INFO - [979c0d57-545b-43cd-8601-b2ae280d1197] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_007.mp4' +2025-08-20 23:40:12 - INFO - [979c0d57-545b-43cd-8601-b2ae280d1197] Video saved to temporary file: temp_videos/979c0d57-545b-43cd-8601-b2ae280d1197.mp4 +2025-08-20 23:40:12 - INFO - [979c0d57-545b-43cd-8601-b2ae280d1197] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:40:17 - INFO - [979c0d57-545b-43cd-8601-b2ae280d1197] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:40:17 - INFO - [979c0d57-545b-43cd-8601-b2ae280d1197] 30 frames saved to temp_videos/979c0d57-545b-43cd-8601-b2ae280d1197 +2025-08-20 23:40:30 - INFO - vision_config is None, using default vision config +2025-08-20 23:40:46 - INFO - Tokens per second: 8.128884269772154, Peak GPU memory MB: 11824.375 +2025-08-20 23:40:46 - INFO - [979c0d57-545b-43cd-8601-b2ae280d1197] Inference time: 33.74 seconds, CPU usage: 35.5%, CPU core utilization: [29.4, 48.5, 49.0, 15.1] +2025-08-20 23:40:46 - INFO - [979c0d57-545b-43cd-8601-b2ae280d1197] Cleaned up temporary frame directory: temp_videos/979c0d57-545b-43cd-8601-b2ae280d1197 +2025-08-20 23:40:46 - INFO - [09341564-481c-4f01-b9ae-fd64f197af41] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_008.mp4' +2025-08-20 23:40:46 - INFO - [09341564-481c-4f01-b9ae-fd64f197af41] Video saved to temporary file: temp_videos/09341564-481c-4f01-b9ae-fd64f197af41.mp4 +2025-08-20 23:40:46 - INFO - [09341564-481c-4f01-b9ae-fd64f197af41] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:40:51 - INFO - [09341564-481c-4f01-b9ae-fd64f197af41] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:40:51 - INFO - [09341564-481c-4f01-b9ae-fd64f197af41] 30 frames saved to temp_videos/09341564-481c-4f01-b9ae-fd64f197af41 +2025-08-20 23:41:04 - INFO - vision_config is None, using default vision config +2025-08-20 23:41:20 - INFO - Tokens per second: 8.104748907769563, Peak GPU memory MB: 11824.375 +2025-08-20 23:41:20 - INFO - [09341564-481c-4f01-b9ae-fd64f197af41] Inference time: 33.69 seconds, CPU usage: 35.7%, CPU core utilization: [22.5, 24.9, 45.2, 50.3] +2025-08-20 23:41:20 - INFO - [09341564-481c-4f01-b9ae-fd64f197af41] Cleaned up temporary frame directory: temp_videos/09341564-481c-4f01-b9ae-fd64f197af41 +2025-08-20 23:41:20 - INFO - [152baa13-549f-4cd5-a9fd-b9caa193427c] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_009.mp4' +2025-08-20 23:41:20 - INFO - [152baa13-549f-4cd5-a9fd-b9caa193427c] Video saved to temporary file: temp_videos/152baa13-549f-4cd5-a9fd-b9caa193427c.mp4 +2025-08-20 23:41:20 - INFO - [152baa13-549f-4cd5-a9fd-b9caa193427c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:41:24 - INFO - [152baa13-549f-4cd5-a9fd-b9caa193427c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:41:24 - INFO - [152baa13-549f-4cd5-a9fd-b9caa193427c] 30 frames saved to temp_videos/152baa13-549f-4cd5-a9fd-b9caa193427c +2025-08-20 23:41:37 - INFO - vision_config is None, using default vision config +2025-08-20 23:41:54 - INFO - Tokens per second: 8.260490846413186, Peak GPU memory MB: 11824.375 +2025-08-20 23:41:54 - INFO - [152baa13-549f-4cd5-a9fd-b9caa193427c] Inference time: 34.09 seconds, CPU usage: 35.2%, CPU core utilization: [44.4, 20.5, 41.4, 34.6] +2025-08-20 23:41:54 - INFO - [152baa13-549f-4cd5-a9fd-b9caa193427c] Cleaned up temporary frame directory: temp_videos/152baa13-549f-4cd5-a9fd-b9caa193427c +2025-08-20 23:41:54 - INFO - [f16dda49-2af2-4b53-b80c-e152a2424314] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_010.mp4' +2025-08-20 23:41:54 - INFO - [f16dda49-2af2-4b53-b80c-e152a2424314] Video saved to temporary file: temp_videos/f16dda49-2af2-4b53-b80c-e152a2424314.mp4 +2025-08-20 23:41:54 - INFO - [f16dda49-2af2-4b53-b80c-e152a2424314] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:41:58 - INFO - [f16dda49-2af2-4b53-b80c-e152a2424314] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:41:58 - INFO - [f16dda49-2af2-4b53-b80c-e152a2424314] 30 frames saved to temp_videos/f16dda49-2af2-4b53-b80c-e152a2424314 +2025-08-20 23:42:11 - INFO - vision_config is None, using default vision config +2025-08-20 23:42:28 - INFO - Tokens per second: 8.474433175082025, Peak GPU memory MB: 11824.375 +2025-08-20 23:42:28 - INFO - [f16dda49-2af2-4b53-b80c-e152a2424314] Inference time: 34.75 seconds, CPU usage: 35.3%, CPU core utilization: [40.2, 15.6, 70.7, 14.8] +2025-08-20 23:42:28 - INFO - [f16dda49-2af2-4b53-b80c-e152a2424314] Cleaned up temporary frame directory: temp_videos/f16dda49-2af2-4b53-b80c-e152a2424314 +2025-08-20 23:42:28 - INFO - [4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_011.mp4' +2025-08-20 23:42:28 - INFO - [4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55] Video saved to temporary file: temp_videos/4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55.mp4 +2025-08-20 23:42:28 - INFO - [4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:42:33 - INFO - [4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:42:33 - INFO - [4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55] 30 frames saved to temp_videos/4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55 +2025-08-20 23:42:46 - INFO - vision_config is None, using default vision config +2025-08-20 23:43:00 - INFO - Tokens per second: 7.281456041328239, Peak GPU memory MB: 11824.375 +2025-08-20 23:43:00 - INFO - [4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55] Inference time: 31.78 seconds, CPU usage: 35.9%, CPU core utilization: [41.3, 59.3, 27.5, 15.5] +2025-08-20 23:43:00 - INFO - [4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55] Cleaned up temporary frame directory: temp_videos/4ad4941a-d5b6-4fef-bf4e-4ba8bf29ac55 +2025-08-20 23:43:00 - INFO - [b91fc444-5706-4fc6-b7e3-97045044f1ad] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_012.mp4' +2025-08-20 23:43:00 - INFO - [b91fc444-5706-4fc6-b7e3-97045044f1ad] Video saved to temporary file: temp_videos/b91fc444-5706-4fc6-b7e3-97045044f1ad.mp4 +2025-08-20 23:43:00 - INFO - [b91fc444-5706-4fc6-b7e3-97045044f1ad] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:43:05 - INFO - [b91fc444-5706-4fc6-b7e3-97045044f1ad] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:43:05 - INFO - [b91fc444-5706-4fc6-b7e3-97045044f1ad] 30 frames saved to temp_videos/b91fc444-5706-4fc6-b7e3-97045044f1ad +2025-08-20 23:43:18 - INFO - vision_config is None, using default vision config +2025-08-20 23:43:34 - INFO - Tokens per second: 8.229193657182282, Peak GPU memory MB: 11824.375 +2025-08-20 23:43:34 - INFO - [b91fc444-5706-4fc6-b7e3-97045044f1ad] Inference time: 34.07 seconds, CPU usage: 35.4%, CPU core utilization: [17.3, 58.4, 14.4, 51.7] +2025-08-20 23:43:34 - INFO - [b91fc444-5706-4fc6-b7e3-97045044f1ad] Cleaned up temporary frame directory: temp_videos/b91fc444-5706-4fc6-b7e3-97045044f1ad +2025-08-20 23:43:34 - INFO - [8470205f-d443-4e1d-9371-99a2ab5f76b6] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_013.mp4' +2025-08-20 23:43:34 - INFO - [8470205f-d443-4e1d-9371-99a2ab5f76b6] Video saved to temporary file: temp_videos/8470205f-d443-4e1d-9371-99a2ab5f76b6.mp4 +2025-08-20 23:43:34 - INFO - [8470205f-d443-4e1d-9371-99a2ab5f76b6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:43:39 - INFO - [8470205f-d443-4e1d-9371-99a2ab5f76b6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:43:39 - INFO - [8470205f-d443-4e1d-9371-99a2ab5f76b6] 30 frames saved to temp_videos/8470205f-d443-4e1d-9371-99a2ab5f76b6 +2025-08-20 23:43:52 - INFO - vision_config is None, using default vision config +2025-08-20 23:44:08 - INFO - Tokens per second: 7.933309315527597, Peak GPU memory MB: 11824.375 +2025-08-20 23:44:08 - INFO - [8470205f-d443-4e1d-9371-99a2ab5f76b6] Inference time: 33.32 seconds, CPU usage: 35.5%, CPU core utilization: [59.0, 27.1, 15.4, 40.4] +2025-08-20 23:44:08 - INFO - [8470205f-d443-4e1d-9371-99a2ab5f76b6] Cleaned up temporary frame directory: temp_videos/8470205f-d443-4e1d-9371-99a2ab5f76b6 +2025-08-20 23:44:08 - INFO - [87989d50-7108-4eea-a2e2-3ac68ce850b0] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_014.mp4' +2025-08-20 23:44:08 - INFO - [87989d50-7108-4eea-a2e2-3ac68ce850b0] Video saved to temporary file: temp_videos/87989d50-7108-4eea-a2e2-3ac68ce850b0.mp4 +2025-08-20 23:44:08 - INFO - [87989d50-7108-4eea-a2e2-3ac68ce850b0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:44:13 - INFO - [87989d50-7108-4eea-a2e2-3ac68ce850b0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:44:13 - INFO - [87989d50-7108-4eea-a2e2-3ac68ce850b0] 30 frames saved to temp_videos/87989d50-7108-4eea-a2e2-3ac68ce850b0 +2025-08-20 23:44:25 - INFO - vision_config is None, using default vision config +2025-08-20 23:44:47 - INFO - Tokens per second: 9.6318698479325, Peak GPU memory MB: 11824.375 +2025-08-20 23:44:47 - INFO - [87989d50-7108-4eea-a2e2-3ac68ce850b0] Inference time: 39.46 seconds, CPU usage: 34.5%, CPU core utilization: [46.5, 21.2, 28.6, 41.7] +2025-08-20 23:44:47 - INFO - [87989d50-7108-4eea-a2e2-3ac68ce850b0] Cleaned up temporary frame directory: temp_videos/87989d50-7108-4eea-a2e2-3ac68ce850b0 +2025-08-20 23:44:47 - INFO - [2e1a26e8-2e97-4635-9479-94580fac857d] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_015.mp4' +2025-08-20 23:44:47 - INFO - [2e1a26e8-2e97-4635-9479-94580fac857d] Video saved to temporary file: temp_videos/2e1a26e8-2e97-4635-9479-94580fac857d.mp4 +2025-08-20 23:44:47 - INFO - [2e1a26e8-2e97-4635-9479-94580fac857d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:44:52 - INFO - [2e1a26e8-2e97-4635-9479-94580fac857d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:44:52 - INFO - [2e1a26e8-2e97-4635-9479-94580fac857d] 30 frames saved to temp_videos/2e1a26e8-2e97-4635-9479-94580fac857d +2025-08-20 23:45:05 - INFO - vision_config is None, using default vision config +2025-08-20 23:45:19 - INFO - Tokens per second: 7.41501152980709, Peak GPU memory MB: 11824.375 +2025-08-20 23:45:19 - INFO - [2e1a26e8-2e97-4635-9479-94580fac857d] Inference time: 32.04 seconds, CPU usage: 35.9%, CPU core utilization: [44.7, 31.3, 28.4, 39.1] +2025-08-20 23:45:19 - INFO - [2e1a26e8-2e97-4635-9479-94580fac857d] Cleaned up temporary frame directory: temp_videos/2e1a26e8-2e97-4635-9479-94580fac857d +2025-08-20 23:45:19 - INFO - [e846a73a-a898-4957-8799-5b00b759bd1c] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_016.mp4' +2025-08-20 23:45:19 - INFO - [e846a73a-a898-4957-8799-5b00b759bd1c] Video saved to temporary file: temp_videos/e846a73a-a898-4957-8799-5b00b759bd1c.mp4 +2025-08-20 23:45:19 - INFO - [e846a73a-a898-4957-8799-5b00b759bd1c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:45:24 - INFO - [e846a73a-a898-4957-8799-5b00b759bd1c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:45:24 - INFO - [e846a73a-a898-4957-8799-5b00b759bd1c] 30 frames saved to temp_videos/e846a73a-a898-4957-8799-5b00b759bd1c +2025-08-20 23:45:37 - INFO - vision_config is None, using default vision config +2025-08-20 23:45:57 - INFO - Tokens per second: 9.31005578568323, Peak GPU memory MB: 11824.375 +2025-08-20 23:45:57 - INFO - [e846a73a-a898-4957-8799-5b00b759bd1c] Inference time: 37.84 seconds, CPU usage: 34.4%, CPU core utilization: [26.5, 43.5, 51.5, 16.2] +2025-08-20 23:45:57 - INFO - [e846a73a-a898-4957-8799-5b00b759bd1c] Cleaned up temporary frame directory: temp_videos/e846a73a-a898-4957-8799-5b00b759bd1c +2025-08-20 23:45:57 - INFO - [e95f483f-4174-421b-b85c-177358e58486] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_017.mp4' +2025-08-20 23:45:57 - INFO - [e95f483f-4174-421b-b85c-177358e58486] Video saved to temporary file: temp_videos/e95f483f-4174-421b-b85c-177358e58486.mp4 +2025-08-20 23:45:57 - INFO - [e95f483f-4174-421b-b85c-177358e58486] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:46:02 - INFO - [e95f483f-4174-421b-b85c-177358e58486] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:46:02 - INFO - [e95f483f-4174-421b-b85c-177358e58486] 30 frames saved to temp_videos/e95f483f-4174-421b-b85c-177358e58486 +2025-08-20 23:46:15 - INFO - vision_config is None, using default vision config +2025-08-20 23:46:29 - INFO - Tokens per second: 7.603712717977961, Peak GPU memory MB: 11824.375 +2025-08-20 23:46:29 - INFO - [e95f483f-4174-421b-b85c-177358e58486] Inference time: 32.46 seconds, CPU usage: 36.2%, CPU core utilization: [34.5, 20.6, 74.3, 15.2] +2025-08-20 23:46:29 - INFO - [e95f483f-4174-421b-b85c-177358e58486] Cleaned up temporary frame directory: temp_videos/e95f483f-4174-421b-b85c-177358e58486 +2025-08-20 23:46:29 - INFO - [0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_018.mp4' +2025-08-20 23:46:29 - INFO - [0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b] Video saved to temporary file: temp_videos/0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b.mp4 +2025-08-20 23:46:29 - INFO - [0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:46:37 - INFO - [0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:46:37 - INFO - [0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b] 30 frames saved to temp_videos/0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b +2025-08-20 23:46:50 - INFO - vision_config is None, using default vision config +2025-08-20 23:47:11 - INFO - Tokens per second: 9.605931622305029, Peak GPU memory MB: 11824.375 +2025-08-20 23:47:11 - INFO - [0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b] Inference time: 41.81 seconds, CPU usage: 53.0%, CPU core utilization: [47.2, 51.0, 44.8, 69.2] +2025-08-20 23:47:11 - INFO - [0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b] Cleaned up temporary frame directory: temp_videos/0fae1f1a-6dfb-4b0e-aaa9-b36a4e6ab08b +2025-08-20 23:47:11 - INFO - [1317b652-49c6-45bf-bbb5-329fa9fd9572] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_019.mp4' +2025-08-20 23:47:11 - INFO - [1317b652-49c6-45bf-bbb5-329fa9fd9572] Video saved to temporary file: temp_videos/1317b652-49c6-45bf-bbb5-329fa9fd9572.mp4 +2025-08-20 23:47:11 - INFO - [1317b652-49c6-45bf-bbb5-329fa9fd9572] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:47:16 - INFO - [1317b652-49c6-45bf-bbb5-329fa9fd9572] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:47:16 - INFO - [1317b652-49c6-45bf-bbb5-329fa9fd9572] 30 frames saved to temp_videos/1317b652-49c6-45bf-bbb5-329fa9fd9572 +2025-08-20 23:47:29 - INFO - vision_config is None, using default vision config +2025-08-20 23:47:42 - INFO - Tokens per second: 6.553943613195835, Peak GPU memory MB: 11824.375 +2025-08-20 23:47:42 - INFO - [1317b652-49c6-45bf-bbb5-329fa9fd9572] Inference time: 30.42 seconds, CPU usage: 36.3%, CPU core utilization: [28.7, 56.6, 44.1, 15.8] +2025-08-20 23:47:42 - INFO - [1317b652-49c6-45bf-bbb5-329fa9fd9572] Cleaned up temporary frame directory: temp_videos/1317b652-49c6-45bf-bbb5-329fa9fd9572 +2025-08-20 23:47:42 - INFO - [dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_020.mp4' +2025-08-20 23:47:42 - INFO - [dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef] Video saved to temporary file: temp_videos/dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef.mp4 +2025-08-20 23:47:42 - INFO - [dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:47:47 - INFO - [dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:47:47 - INFO - [dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef] 30 frames saved to temp_videos/dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef +2025-08-20 23:48:00 - INFO - vision_config is None, using default vision config +2025-08-20 23:48:15 - INFO - Tokens per second: 7.993319182461253, Peak GPU memory MB: 11824.375 +2025-08-20 23:48:15 - INFO - [dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef] Inference time: 33.46 seconds, CPU usage: 35.9%, CPU core utilization: [55.1, 38.2, 24.9, 25.4] +2025-08-20 23:48:15 - INFO - [dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef] Cleaned up temporary frame directory: temp_videos/dc27d7c0-3dad-49d5-aa0b-79cc072fe0ef +2025-08-20 23:48:15 - INFO - [2d2b338a-5383-48df-a50c-a5469d4bf2a2] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_021.mp4' +2025-08-20 23:48:15 - INFO - [2d2b338a-5383-48df-a50c-a5469d4bf2a2] Video saved to temporary file: temp_videos/2d2b338a-5383-48df-a50c-a5469d4bf2a2.mp4 +2025-08-20 23:48:15 - INFO - [2d2b338a-5383-48df-a50c-a5469d4bf2a2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:48:20 - INFO - [2d2b338a-5383-48df-a50c-a5469d4bf2a2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:48:20 - INFO - [2d2b338a-5383-48df-a50c-a5469d4bf2a2] 30 frames saved to temp_videos/2d2b338a-5383-48df-a50c-a5469d4bf2a2 +2025-08-20 23:48:33 - INFO - vision_config is None, using default vision config +2025-08-20 23:48:50 - INFO - Tokens per second: 8.475114910409703, Peak GPU memory MB: 11824.375 +2025-08-20 23:48:50 - INFO - [2d2b338a-5383-48df-a50c-a5469d4bf2a2] Inference time: 34.69 seconds, CPU usage: 35.2%, CPU core utilization: [23.3, 27.9, 41.5, 48.1] +2025-08-20 23:48:50 - INFO - [2d2b338a-5383-48df-a50c-a5469d4bf2a2] Cleaned up temporary frame directory: temp_videos/2d2b338a-5383-48df-a50c-a5469d4bf2a2 +2025-08-20 23:48:50 - INFO - [91c7b79b-21ef-4745-a10e-22a03a1abd0c] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_022.mp4' +2025-08-20 23:48:50 - INFO - [91c7b79b-21ef-4745-a10e-22a03a1abd0c] Video saved to temporary file: temp_videos/91c7b79b-21ef-4745-a10e-22a03a1abd0c.mp4 +2025-08-20 23:48:50 - INFO - [91c7b79b-21ef-4745-a10e-22a03a1abd0c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:48:55 - INFO - [91c7b79b-21ef-4745-a10e-22a03a1abd0c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:48:55 - INFO - [91c7b79b-21ef-4745-a10e-22a03a1abd0c] 30 frames saved to temp_videos/91c7b79b-21ef-4745-a10e-22a03a1abd0c +2025-08-20 23:49:08 - INFO - vision_config is None, using default vision config +2025-08-20 23:49:23 - INFO - Tokens per second: 7.968927639160867, Peak GPU memory MB: 11824.375 +2025-08-20 23:49:23 - INFO - [91c7b79b-21ef-4745-a10e-22a03a1abd0c] Inference time: 33.28 seconds, CPU usage: 35.7%, CPU core utilization: [63.9, 25.4, 38.6, 14.6] +2025-08-20 23:49:23 - INFO - [91c7b79b-21ef-4745-a10e-22a03a1abd0c] Cleaned up temporary frame directory: temp_videos/91c7b79b-21ef-4745-a10e-22a03a1abd0c +2025-08-20 23:49:23 - INFO - [5139cda6-0da8-43e9-8f70-5adf1590238b] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_023.mp4' +2025-08-20 23:49:23 - INFO - [5139cda6-0da8-43e9-8f70-5adf1590238b] Video saved to temporary file: temp_videos/5139cda6-0da8-43e9-8f70-5adf1590238b.mp4 +2025-08-20 23:49:23 - INFO - [5139cda6-0da8-43e9-8f70-5adf1590238b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:49:28 - INFO - [5139cda6-0da8-43e9-8f70-5adf1590238b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:49:28 - INFO - [5139cda6-0da8-43e9-8f70-5adf1590238b] 30 frames saved to temp_videos/5139cda6-0da8-43e9-8f70-5adf1590238b +2025-08-20 23:49:41 - INFO - vision_config is None, using default vision config +2025-08-20 23:49:55 - INFO - Tokens per second: 7.454720470818721, Peak GPU memory MB: 11824.375 +2025-08-20 23:49:55 - INFO - [5139cda6-0da8-43e9-8f70-5adf1590238b] Inference time: 32.14 seconds, CPU usage: 35.9%, CPU core utilization: [54.3, 22.4, 50.0, 17.0] +2025-08-20 23:49:55 - INFO - [5139cda6-0da8-43e9-8f70-5adf1590238b] Cleaned up temporary frame directory: temp_videos/5139cda6-0da8-43e9-8f70-5adf1590238b +2025-08-20 23:49:55 - INFO - [b92496ee-5a36-490d-aadb-2f06439d7995] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_024.mp4' +2025-08-20 23:49:55 - INFO - [b92496ee-5a36-490d-aadb-2f06439d7995] Video saved to temporary file: temp_videos/b92496ee-5a36-490d-aadb-2f06439d7995.mp4 +2025-08-20 23:49:55 - INFO - [b92496ee-5a36-490d-aadb-2f06439d7995] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:50:00 - INFO - [b92496ee-5a36-490d-aadb-2f06439d7995] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:50:00 - INFO - [b92496ee-5a36-490d-aadb-2f06439d7995] 30 frames saved to temp_videos/b92496ee-5a36-490d-aadb-2f06439d7995 +2025-08-20 23:50:13 - INFO - vision_config is None, using default vision config +2025-08-20 23:50:29 - INFO - Tokens per second: 8.25678903581856, Peak GPU memory MB: 11824.375 +2025-08-20 23:50:29 - INFO - [b92496ee-5a36-490d-aadb-2f06439d7995] Inference time: 34.09 seconds, CPU usage: 35.5%, CPU core utilization: [51.5, 45.5, 16.6, 28.4] +2025-08-20 23:50:29 - INFO - [b92496ee-5a36-490d-aadb-2f06439d7995] Cleaned up temporary frame directory: temp_videos/b92496ee-5a36-490d-aadb-2f06439d7995 +2025-08-20 23:50:29 - INFO - [4ecf6faf-4096-491c-8670-2c39714858dc] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_025.mp4' +2025-08-20 23:50:29 - INFO - [4ecf6faf-4096-491c-8670-2c39714858dc] Video saved to temporary file: temp_videos/4ecf6faf-4096-491c-8670-2c39714858dc.mp4 +2025-08-20 23:50:29 - INFO - [4ecf6faf-4096-491c-8670-2c39714858dc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:50:34 - INFO - [4ecf6faf-4096-491c-8670-2c39714858dc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:50:34 - INFO - [4ecf6faf-4096-491c-8670-2c39714858dc] 30 frames saved to temp_videos/4ecf6faf-4096-491c-8670-2c39714858dc +2025-08-20 23:50:47 - INFO - vision_config is None, using default vision config +2025-08-20 23:51:17 - INFO - Tokens per second: 10.736432842240825, Peak GPU memory MB: 11824.375 +2025-08-20 23:51:17 - INFO - [4ecf6faf-4096-491c-8670-2c39714858dc] Inference time: 47.95 seconds, CPU usage: 33.5%, CPU core utilization: [38.6, 16.2, 63.7, 15.3] +2025-08-20 23:51:17 - INFO - [4ecf6faf-4096-491c-8670-2c39714858dc] Cleaned up temporary frame directory: temp_videos/4ecf6faf-4096-491c-8670-2c39714858dc +2025-08-20 23:54:07 - INFO - [380eb612-eca5-49dd-b2d1-bbdeb5ecf54c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-20 23:54:07 - INFO - [380eb612-eca5-49dd-b2d1-bbdeb5ecf54c] Video saved to temporary file: temp_videos/380eb612-eca5-49dd-b2d1-bbdeb5ecf54c.mp4 +2025-08-20 23:54:07 - INFO - [380eb612-eca5-49dd-b2d1-bbdeb5ecf54c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:54:12 - INFO - [380eb612-eca5-49dd-b2d1-bbdeb5ecf54c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:54:12 - INFO - [380eb612-eca5-49dd-b2d1-bbdeb5ecf54c] 30 frames saved to temp_videos/380eb612-eca5-49dd-b2d1-bbdeb5ecf54c +2025-08-20 23:54:25 - INFO - vision_config is None, using default vision config +2025-08-20 23:54:37 - INFO - Tokens per second: 6.4951065299349615, Peak GPU memory MB: 11824.375 +2025-08-20 23:54:37 - INFO - [380eb612-eca5-49dd-b2d1-bbdeb5ecf54c] Inference time: 29.85 seconds, CPU usage: 6.8%, CPU core utilization: [6.3, 7.3, 10.0, 3.6] +2025-08-20 23:54:37 - INFO - [380eb612-eca5-49dd-b2d1-bbdeb5ecf54c] Cleaned up temporary frame directory: temp_videos/380eb612-eca5-49dd-b2d1-bbdeb5ecf54c +2025-08-20 23:54:37 - INFO - [aee27771-15f4-4e39-89c7-953aaf2d6435] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-20 23:54:37 - INFO - [aee27771-15f4-4e39-89c7-953aaf2d6435] Video saved to temporary file: temp_videos/aee27771-15f4-4e39-89c7-953aaf2d6435.mp4 +2025-08-20 23:54:37 - INFO - [aee27771-15f4-4e39-89c7-953aaf2d6435] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:54:42 - INFO - [aee27771-15f4-4e39-89c7-953aaf2d6435] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:54:42 - INFO - [aee27771-15f4-4e39-89c7-953aaf2d6435] 30 frames saved to temp_videos/aee27771-15f4-4e39-89c7-953aaf2d6435 +2025-08-20 23:54:55 - INFO - vision_config is None, using default vision config +2025-08-20 23:55:04 - INFO - Tokens per second: 4.620365146287069, Peak GPU memory MB: 11824.375 +2025-08-20 23:55:04 - INFO - [aee27771-15f4-4e39-89c7-953aaf2d6435] Inference time: 27.55 seconds, CPU usage: 37.7%, CPU core utilization: [23.5, 33.0, 45.6, 48.7] +2025-08-20 23:55:04 - INFO - [aee27771-15f4-4e39-89c7-953aaf2d6435] Cleaned up temporary frame directory: temp_videos/aee27771-15f4-4e39-89c7-953aaf2d6435 +2025-08-20 23:55:04 - INFO - [33034474-6cc6-44ea-bf4e-595b33e0a842] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-20 23:55:04 - INFO - [33034474-6cc6-44ea-bf4e-595b33e0a842] Video saved to temporary file: temp_videos/33034474-6cc6-44ea-bf4e-595b33e0a842.mp4 +2025-08-20 23:55:04 - INFO - [33034474-6cc6-44ea-bf4e-595b33e0a842] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:55:09 - INFO - [33034474-6cc6-44ea-bf4e-595b33e0a842] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:55:09 - INFO - [33034474-6cc6-44ea-bf4e-595b33e0a842] 30 frames saved to temp_videos/33034474-6cc6-44ea-bf4e-595b33e0a842 +2025-08-20 23:55:22 - INFO - vision_config is None, using default vision config +2025-08-20 23:55:35 - INFO - Tokens per second: 6.8004162288880075, Peak GPU memory MB: 11824.375 +2025-08-20 23:55:35 - INFO - [33034474-6cc6-44ea-bf4e-595b33e0a842] Inference time: 30.57 seconds, CPU usage: 36.6%, CPU core utilization: [23.5, 34.8, 48.2, 39.9] +2025-08-20 23:55:35 - INFO - [33034474-6cc6-44ea-bf4e-595b33e0a842] Cleaned up temporary frame directory: temp_videos/33034474-6cc6-44ea-bf4e-595b33e0a842 +2025-08-20 23:55:35 - INFO - [08e94b4d-f36d-44c5-b12a-d657bad86595] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-20 23:55:35 - INFO - [08e94b4d-f36d-44c5-b12a-d657bad86595] Video saved to temporary file: temp_videos/08e94b4d-f36d-44c5-b12a-d657bad86595.mp4 +2025-08-20 23:55:35 - INFO - [08e94b4d-f36d-44c5-b12a-d657bad86595] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:55:40 - INFO - [08e94b4d-f36d-44c5-b12a-d657bad86595] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:55:40 - INFO - [08e94b4d-f36d-44c5-b12a-d657bad86595] 30 frames saved to temp_videos/08e94b4d-f36d-44c5-b12a-d657bad86595 +2025-08-20 23:55:53 - INFO - vision_config is None, using default vision config +2025-08-20 23:56:04 - INFO - Tokens per second: 5.72115634822065, Peak GPU memory MB: 11824.375 +2025-08-20 23:56:04 - INFO - [08e94b4d-f36d-44c5-b12a-d657bad86595] Inference time: 29.03 seconds, CPU usage: 36.6%, CPU core utilization: [18.2, 59.9, 52.6, 16.0] +2025-08-20 23:56:04 - INFO - [08e94b4d-f36d-44c5-b12a-d657bad86595] Cleaned up temporary frame directory: temp_videos/08e94b4d-f36d-44c5-b12a-d657bad86595 +2025-08-20 23:56:04 - INFO - [aeba9bb8-25b3-44c0-8827-e08f9da881f5] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-20 23:56:04 - INFO - [aeba9bb8-25b3-44c0-8827-e08f9da881f5] Video saved to temporary file: temp_videos/aeba9bb8-25b3-44c0-8827-e08f9da881f5.mp4 +2025-08-20 23:56:04 - INFO - [aeba9bb8-25b3-44c0-8827-e08f9da881f5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:56:09 - INFO - [aeba9bb8-25b3-44c0-8827-e08f9da881f5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:56:09 - INFO - [aeba9bb8-25b3-44c0-8827-e08f9da881f5] 30 frames saved to temp_videos/aeba9bb8-25b3-44c0-8827-e08f9da881f5 +2025-08-20 23:56:22 - INFO - vision_config is None, using default vision config +2025-08-20 23:56:32 - INFO - Tokens per second: 4.8389257523545774, Peak GPU memory MB: 11824.375 +2025-08-20 23:56:32 - INFO - [aeba9bb8-25b3-44c0-8827-e08f9da881f5] Inference time: 28.02 seconds, CPU usage: 37.2%, CPU core utilization: [18.8, 20.2, 51.9, 57.8] +2025-08-20 23:56:32 - INFO - [aeba9bb8-25b3-44c0-8827-e08f9da881f5] Cleaned up temporary frame directory: temp_videos/aeba9bb8-25b3-44c0-8827-e08f9da881f5 +2025-08-20 23:56:32 - INFO - [f59d2020-42d3-4106-ba4a-822cd7a913b9] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-20 23:56:32 - INFO - [f59d2020-42d3-4106-ba4a-822cd7a913b9] Video saved to temporary file: temp_videos/f59d2020-42d3-4106-ba4a-822cd7a913b9.mp4 +2025-08-20 23:56:32 - INFO - [f59d2020-42d3-4106-ba4a-822cd7a913b9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:56:37 - INFO - [f59d2020-42d3-4106-ba4a-822cd7a913b9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:56:37 - INFO - [f59d2020-42d3-4106-ba4a-822cd7a913b9] 30 frames saved to temp_videos/f59d2020-42d3-4106-ba4a-822cd7a913b9 +2025-08-20 23:56:50 - INFO - vision_config is None, using default vision config +2025-08-20 23:57:01 - INFO - Tokens per second: 5.491475730408376, Peak GPU memory MB: 11824.375 +2025-08-20 23:57:01 - INFO - [f59d2020-42d3-4106-ba4a-822cd7a913b9] Inference time: 28.85 seconds, CPU usage: 36.8%, CPU core utilization: [19.5, 63.2, 31.2, 33.4] +2025-08-20 23:57:01 - INFO - [f59d2020-42d3-4106-ba4a-822cd7a913b9] Cleaned up temporary frame directory: temp_videos/f59d2020-42d3-4106-ba4a-822cd7a913b9 +2025-08-20 23:57:01 - INFO - [10f4ed0e-10c6-4a0e-baef-cbc5a15a174c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_007.mp4' +2025-08-20 23:57:01 - INFO - [10f4ed0e-10c6-4a0e-baef-cbc5a15a174c] Video saved to temporary file: temp_videos/10f4ed0e-10c6-4a0e-baef-cbc5a15a174c.mp4 +2025-08-20 23:57:01 - INFO - [10f4ed0e-10c6-4a0e-baef-cbc5a15a174c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:57:06 - INFO - [10f4ed0e-10c6-4a0e-baef-cbc5a15a174c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:57:06 - INFO - [10f4ed0e-10c6-4a0e-baef-cbc5a15a174c] 30 frames saved to temp_videos/10f4ed0e-10c6-4a0e-baef-cbc5a15a174c +2025-08-20 23:57:19 - INFO - vision_config is None, using default vision config +2025-08-20 23:57:30 - INFO - Tokens per second: 5.435457465325606, Peak GPU memory MB: 11824.375 +2025-08-20 23:57:30 - INFO - [10f4ed0e-10c6-4a0e-baef-cbc5a15a174c] Inference time: 28.75 seconds, CPU usage: 37.6%, CPU core utilization: [28.1, 30.6, 71.4, 20.3] +2025-08-20 23:57:30 - INFO - [10f4ed0e-10c6-4a0e-baef-cbc5a15a174c] Cleaned up temporary frame directory: temp_videos/10f4ed0e-10c6-4a0e-baef-cbc5a15a174c +2025-08-20 23:57:30 - INFO - [5a373da4-23c9-4001-93b0-2b33e1830f66] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_008.mp4' +2025-08-20 23:57:30 - INFO - [5a373da4-23c9-4001-93b0-2b33e1830f66] Video saved to temporary file: temp_videos/5a373da4-23c9-4001-93b0-2b33e1830f66.mp4 +2025-08-20 23:57:30 - INFO - [5a373da4-23c9-4001-93b0-2b33e1830f66] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:57:35 - INFO - [5a373da4-23c9-4001-93b0-2b33e1830f66] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:57:35 - INFO - [5a373da4-23c9-4001-93b0-2b33e1830f66] 30 frames saved to temp_videos/5a373da4-23c9-4001-93b0-2b33e1830f66 +2025-08-20 23:57:48 - INFO - vision_config is None, using default vision config +2025-08-20 23:57:58 - INFO - Tokens per second: 4.895421462414148, Peak GPU memory MB: 11824.375 +2025-08-20 23:57:58 - INFO - [5a373da4-23c9-4001-93b0-2b33e1830f66] Inference time: 28.19 seconds, CPU usage: 37.4%, CPU core utilization: [27.1, 35.4, 25.4, 61.8] +2025-08-20 23:57:58 - INFO - [5a373da4-23c9-4001-93b0-2b33e1830f66] Cleaned up temporary frame directory: temp_videos/5a373da4-23c9-4001-93b0-2b33e1830f66 +2025-08-20 23:57:58 - INFO - [3b4f5fd8-cb88-4655-b977-6866088b99b9] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_009.mp4' +2025-08-20 23:57:58 - INFO - [3b4f5fd8-cb88-4655-b977-6866088b99b9] Video saved to temporary file: temp_videos/3b4f5fd8-cb88-4655-b977-6866088b99b9.mp4 +2025-08-20 23:57:58 - INFO - [3b4f5fd8-cb88-4655-b977-6866088b99b9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:58:03 - INFO - [3b4f5fd8-cb88-4655-b977-6866088b99b9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:58:03 - INFO - [3b4f5fd8-cb88-4655-b977-6866088b99b9] 30 frames saved to temp_videos/3b4f5fd8-cb88-4655-b977-6866088b99b9 +2025-08-20 23:58:16 - INFO - vision_config is None, using default vision config +2025-08-20 23:58:28 - INFO - Tokens per second: 6.246345549948331, Peak GPU memory MB: 11824.375 +2025-08-20 23:58:28 - INFO - [3b4f5fd8-cb88-4655-b977-6866088b99b9] Inference time: 29.96 seconds, CPU usage: 36.8%, CPU core utilization: [36.0, 58.0, 37.8, 15.2] +2025-08-20 23:58:28 - INFO - [3b4f5fd8-cb88-4655-b977-6866088b99b9] Cleaned up temporary frame directory: temp_videos/3b4f5fd8-cb88-4655-b977-6866088b99b9 +2025-08-20 23:58:28 - INFO - [db892fe2-bfd0-4e5d-9afe-9740e936d083] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_010.mp4' +2025-08-20 23:58:28 - INFO - [db892fe2-bfd0-4e5d-9afe-9740e936d083] Video saved to temporary file: temp_videos/db892fe2-bfd0-4e5d-9afe-9740e936d083.mp4 +2025-08-20 23:58:28 - INFO - [db892fe2-bfd0-4e5d-9afe-9740e936d083] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:58:33 - INFO - [db892fe2-bfd0-4e5d-9afe-9740e936d083] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:58:33 - INFO - [db892fe2-bfd0-4e5d-9afe-9740e936d083] 30 frames saved to temp_videos/db892fe2-bfd0-4e5d-9afe-9740e936d083 +2025-08-20 23:58:46 - INFO - vision_config is None, using default vision config +2025-08-20 23:59:01 - INFO - Tokens per second: 7.823522467010842, Peak GPU memory MB: 11824.375 +2025-08-20 23:59:01 - INFO - [db892fe2-bfd0-4e5d-9afe-9740e936d083] Inference time: 32.94 seconds, CPU usage: 35.7%, CPU core utilization: [52.5, 23.9, 22.6, 43.7] +2025-08-20 23:59:01 - INFO - [db892fe2-bfd0-4e5d-9afe-9740e936d083] Cleaned up temporary frame directory: temp_videos/db892fe2-bfd0-4e5d-9afe-9740e936d083 +2025-08-20 23:59:01 - INFO - [2c42d84f-5110-43d0-8b9a-70073da685d1] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_011.mp4' +2025-08-20 23:59:01 - INFO - [2c42d84f-5110-43d0-8b9a-70073da685d1] Video saved to temporary file: temp_videos/2c42d84f-5110-43d0-8b9a-70073da685d1.mp4 +2025-08-20 23:59:01 - INFO - [2c42d84f-5110-43d0-8b9a-70073da685d1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:59:06 - INFO - [2c42d84f-5110-43d0-8b9a-70073da685d1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:59:06 - INFO - [2c42d84f-5110-43d0-8b9a-70073da685d1] 30 frames saved to temp_videos/2c42d84f-5110-43d0-8b9a-70073da685d1 +2025-08-20 23:59:19 - INFO - vision_config is None, using default vision config +2025-08-20 23:59:30 - INFO - Tokens per second: 5.483772695220153, Peak GPU memory MB: 11824.375 +2025-08-20 23:59:30 - INFO - [2c42d84f-5110-43d0-8b9a-70073da685d1] Inference time: 28.90 seconds, CPU usage: 37.2%, CPU core utilization: [18.5, 41.5, 21.7, 67.3] +2025-08-20 23:59:30 - INFO - [2c42d84f-5110-43d0-8b9a-70073da685d1] Cleaned up temporary frame directory: temp_videos/2c42d84f-5110-43d0-8b9a-70073da685d1 +2025-08-20 23:59:30 - INFO - [2872bdf2-bc5e-418d-8cae-07911540d748] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_012.mp4' +2025-08-20 23:59:30 - INFO - [2872bdf2-bc5e-418d-8cae-07911540d748] Video saved to temporary file: temp_videos/2872bdf2-bc5e-418d-8cae-07911540d748.mp4 +2025-08-20 23:59:30 - INFO - [2872bdf2-bc5e-418d-8cae-07911540d748] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-20 23:59:35 - INFO - [2872bdf2-bc5e-418d-8cae-07911540d748] Extracted 30 frames successfully. Saving to temporary files... +2025-08-20 23:59:35 - INFO - [2872bdf2-bc5e-418d-8cae-07911540d748] 30 frames saved to temp_videos/2872bdf2-bc5e-418d-8cae-07911540d748 +2025-08-20 23:59:48 - INFO - vision_config is None, using default vision config +2025-08-21 00:00:00 - INFO - Tokens per second: 6.375310581123721, Peak GPU memory MB: 11824.375 +2025-08-21 00:00:00 - INFO - [2872bdf2-bc5e-418d-8cae-07911540d748] Inference time: 30.13 seconds, CPU usage: 37.0%, CPU core utilization: [32.4, 49.7, 41.4, 24.4] +2025-08-21 00:00:00 - INFO - [2872bdf2-bc5e-418d-8cae-07911540d748] Cleaned up temporary frame directory: temp_videos/2872bdf2-bc5e-418d-8cae-07911540d748 +2025-08-21 00:00:00 - INFO - [32bd674d-1546-4df6-9cb2-910789260e3b] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_013.mp4' +2025-08-21 00:00:00 - INFO - [32bd674d-1546-4df6-9cb2-910789260e3b] Video saved to temporary file: temp_videos/32bd674d-1546-4df6-9cb2-910789260e3b.mp4 +2025-08-21 00:00:00 - INFO - [32bd674d-1546-4df6-9cb2-910789260e3b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:00:05 - INFO - [32bd674d-1546-4df6-9cb2-910789260e3b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:00:05 - INFO - [32bd674d-1546-4df6-9cb2-910789260e3b] 30 frames saved to temp_videos/32bd674d-1546-4df6-9cb2-910789260e3b +2025-08-21 00:00:18 - INFO - vision_config is None, using default vision config +2025-08-21 00:00:30 - INFO - Tokens per second: 6.371663181689808, Peak GPU memory MB: 11824.375 +2025-08-21 00:00:30 - INFO - [32bd674d-1546-4df6-9cb2-910789260e3b] Inference time: 30.19 seconds, CPU usage: 38.7%, CPU core utilization: [25.3, 24.5, 51.5, 53.1] +2025-08-21 00:00:30 - INFO - [32bd674d-1546-4df6-9cb2-910789260e3b] Cleaned up temporary frame directory: temp_videos/32bd674d-1546-4df6-9cb2-910789260e3b +2025-08-21 00:00:30 - INFO - [b8254b3e-b84d-4015-b0f1-46653b45a403] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_014.mp4' +2025-08-21 00:00:30 - INFO - [b8254b3e-b84d-4015-b0f1-46653b45a403] Video saved to temporary file: temp_videos/b8254b3e-b84d-4015-b0f1-46653b45a403.mp4 +2025-08-21 00:00:30 - INFO - [b8254b3e-b84d-4015-b0f1-46653b45a403] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:00:35 - INFO - [b8254b3e-b84d-4015-b0f1-46653b45a403] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:00:35 - INFO - [b8254b3e-b84d-4015-b0f1-46653b45a403] 30 frames saved to temp_videos/b8254b3e-b84d-4015-b0f1-46653b45a403 +2025-08-21 00:00:48 - INFO - vision_config is None, using default vision config +2025-08-21 00:01:00 - INFO - Tokens per second: 6.242465092882211, Peak GPU memory MB: 11824.375 +2025-08-21 00:01:00 - INFO - [b8254b3e-b84d-4015-b0f1-46653b45a403] Inference time: 29.93 seconds, CPU usage: 36.6%, CPU core utilization: [43.4, 16.4, 27.8, 58.6] +2025-08-21 00:01:00 - INFO - [b8254b3e-b84d-4015-b0f1-46653b45a403] Cleaned up temporary frame directory: temp_videos/b8254b3e-b84d-4015-b0f1-46653b45a403 +2025-08-21 00:01:00 - INFO - [b9ea4590-07b7-4d5d-934d-ee43d847cbe4] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_015.mp4' +2025-08-21 00:01:00 - INFO - [b9ea4590-07b7-4d5d-934d-ee43d847cbe4] Video saved to temporary file: temp_videos/b9ea4590-07b7-4d5d-934d-ee43d847cbe4.mp4 +2025-08-21 00:01:00 - INFO - [b9ea4590-07b7-4d5d-934d-ee43d847cbe4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:01:05 - INFO - [b9ea4590-07b7-4d5d-934d-ee43d847cbe4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:01:05 - INFO - [b9ea4590-07b7-4d5d-934d-ee43d847cbe4] 30 frames saved to temp_videos/b9ea4590-07b7-4d5d-934d-ee43d847cbe4 +2025-08-21 00:01:18 - INFO - vision_config is None, using default vision config +2025-08-21 00:01:31 - INFO - Tokens per second: 6.8003448292571615, Peak GPU memory MB: 11824.375 +2025-08-21 00:01:31 - INFO - [b9ea4590-07b7-4d5d-934d-ee43d847cbe4] Inference time: 30.82 seconds, CPU usage: 36.4%, CPU core utilization: [76.2, 16.3, 36.4, 16.7] +2025-08-21 00:01:31 - INFO - [b9ea4590-07b7-4d5d-934d-ee43d847cbe4] Cleaned up temporary frame directory: temp_videos/b9ea4590-07b7-4d5d-934d-ee43d847cbe4 +2025-08-21 00:01:31 - INFO - [8570af4d-c6dc-4f35-9db8-610d6f2e46d8] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_016.mp4' +2025-08-21 00:01:31 - INFO - [8570af4d-c6dc-4f35-9db8-610d6f2e46d8] Video saved to temporary file: temp_videos/8570af4d-c6dc-4f35-9db8-610d6f2e46d8.mp4 +2025-08-21 00:01:31 - INFO - [8570af4d-c6dc-4f35-9db8-610d6f2e46d8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:01:36 - INFO - [8570af4d-c6dc-4f35-9db8-610d6f2e46d8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:01:36 - INFO - [8570af4d-c6dc-4f35-9db8-610d6f2e46d8] 30 frames saved to temp_videos/8570af4d-c6dc-4f35-9db8-610d6f2e46d8 +2025-08-21 00:01:49 - INFO - vision_config is None, using default vision config +2025-08-21 00:02:01 - INFO - Tokens per second: 6.328218691218011, Peak GPU memory MB: 11824.375 +2025-08-21 00:02:01 - INFO - [8570af4d-c6dc-4f35-9db8-610d6f2e46d8] Inference time: 30.10 seconds, CPU usage: 36.5%, CPU core utilization: [24.4, 22.4, 15.9, 83.2] +2025-08-21 00:02:01 - INFO - [8570af4d-c6dc-4f35-9db8-610d6f2e46d8] Cleaned up temporary frame directory: temp_videos/8570af4d-c6dc-4f35-9db8-610d6f2e46d8 +2025-08-21 00:02:01 - INFO - [576c0d47-06ea-44af-8aa5-2463af7c8cf7] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_017.mp4' +2025-08-21 00:02:01 - INFO - [576c0d47-06ea-44af-8aa5-2463af7c8cf7] Video saved to temporary file: temp_videos/576c0d47-06ea-44af-8aa5-2463af7c8cf7.mp4 +2025-08-21 00:02:01 - INFO - [576c0d47-06ea-44af-8aa5-2463af7c8cf7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:02:06 - INFO - [576c0d47-06ea-44af-8aa5-2463af7c8cf7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:02:06 - INFO - [576c0d47-06ea-44af-8aa5-2463af7c8cf7] 30 frames saved to temp_videos/576c0d47-06ea-44af-8aa5-2463af7c8cf7 +2025-08-21 00:02:19 - INFO - vision_config is None, using default vision config +2025-08-21 00:02:30 - INFO - Tokens per second: 5.366089856961263, Peak GPU memory MB: 11824.375 +2025-08-21 00:02:30 - INFO - [576c0d47-06ea-44af-8aa5-2463af7c8cf7] Inference time: 28.73 seconds, CPU usage: 37.3%, CPU core utilization: [35.5, 35.8, 41.0, 36.9] +2025-08-21 00:02:30 - INFO - [576c0d47-06ea-44af-8aa5-2463af7c8cf7] Cleaned up temporary frame directory: temp_videos/576c0d47-06ea-44af-8aa5-2463af7c8cf7 +2025-08-21 00:02:30 - INFO - [247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_018.mp4' +2025-08-21 00:02:30 - INFO - [247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016] Video saved to temporary file: temp_videos/247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016.mp4 +2025-08-21 00:02:30 - INFO - [247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:02:35 - INFO - [247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:02:35 - INFO - [247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016] 30 frames saved to temp_videos/247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016 +2025-08-21 00:02:48 - INFO - vision_config is None, using default vision config +2025-08-21 00:03:01 - INFO - Tokens per second: 7.211379440337794, Peak GPU memory MB: 11824.375 +2025-08-21 00:03:01 - INFO - [247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016] Inference time: 31.65 seconds, CPU usage: 36.4%, CPU core utilization: [47.5, 23.3, 51.7, 23.0] +2025-08-21 00:03:01 - INFO - [247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016] Cleaned up temporary frame directory: temp_videos/247d4e66-1ef1-4d9e-9fa5-4fa5c66aa016 +2025-08-21 00:03:02 - INFO - [84e77116-c87b-4b39-8001-7954bd84f3fd] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_019.mp4' +2025-08-21 00:03:02 - INFO - [84e77116-c87b-4b39-8001-7954bd84f3fd] Video saved to temporary file: temp_videos/84e77116-c87b-4b39-8001-7954bd84f3fd.mp4 +2025-08-21 00:03:02 - INFO - [84e77116-c87b-4b39-8001-7954bd84f3fd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:03:06 - INFO - [84e77116-c87b-4b39-8001-7954bd84f3fd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:03:06 - INFO - [84e77116-c87b-4b39-8001-7954bd84f3fd] 30 frames saved to temp_videos/84e77116-c87b-4b39-8001-7954bd84f3fd +2025-08-21 00:03:19 - INFO - vision_config is None, using default vision config +2025-08-21 00:03:30 - INFO - Tokens per second: 5.540220507070647, Peak GPU memory MB: 11824.375 +2025-08-21 00:03:30 - INFO - [84e77116-c87b-4b39-8001-7954bd84f3fd] Inference time: 28.94 seconds, CPU usage: 37.0%, CPU core utilization: [33.0, 54.7, 43.2, 17.3] +2025-08-21 00:03:30 - INFO - [84e77116-c87b-4b39-8001-7954bd84f3fd] Cleaned up temporary frame directory: temp_videos/84e77116-c87b-4b39-8001-7954bd84f3fd +2025-08-21 00:03:30 - INFO - [988d9b75-c51b-4171-9ced-1e7ec41af950] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_020.mp4' +2025-08-21 00:03:30 - INFO - [988d9b75-c51b-4171-9ced-1e7ec41af950] Video saved to temporary file: temp_videos/988d9b75-c51b-4171-9ced-1e7ec41af950.mp4 +2025-08-21 00:03:30 - INFO - [988d9b75-c51b-4171-9ced-1e7ec41af950] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:03:35 - INFO - [988d9b75-c51b-4171-9ced-1e7ec41af950] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:03:35 - INFO - [988d9b75-c51b-4171-9ced-1e7ec41af950] 30 frames saved to temp_videos/988d9b75-c51b-4171-9ced-1e7ec41af950 +2025-08-21 00:03:48 - INFO - vision_config is None, using default vision config +2025-08-21 00:04:00 - INFO - Tokens per second: 6.098932177607684, Peak GPU memory MB: 11824.375 +2025-08-21 00:04:00 - INFO - [988d9b75-c51b-4171-9ced-1e7ec41af950] Inference time: 29.66 seconds, CPU usage: 36.9%, CPU core utilization: [19.0, 31.2, 54.7, 42.7] +2025-08-21 00:04:00 - INFO - [988d9b75-c51b-4171-9ced-1e7ec41af950] Cleaned up temporary frame directory: temp_videos/988d9b75-c51b-4171-9ced-1e7ec41af950 +2025-08-21 00:04:00 - INFO - [ad9e17b9-f6e0-44d4-a882-cc7bde714ced] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_021.mp4' +2025-08-21 00:04:00 - INFO - [ad9e17b9-f6e0-44d4-a882-cc7bde714ced] Video saved to temporary file: temp_videos/ad9e17b9-f6e0-44d4-a882-cc7bde714ced.mp4 +2025-08-21 00:04:00 - INFO - [ad9e17b9-f6e0-44d4-a882-cc7bde714ced] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:04:05 - INFO - [ad9e17b9-f6e0-44d4-a882-cc7bde714ced] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:04:05 - INFO - [ad9e17b9-f6e0-44d4-a882-cc7bde714ced] 30 frames saved to temp_videos/ad9e17b9-f6e0-44d4-a882-cc7bde714ced +2025-08-21 00:04:18 - INFO - vision_config is None, using default vision config +2025-08-21 00:04:29 - INFO - Tokens per second: 5.256702476105206, Peak GPU memory MB: 11824.375 +2025-08-21 00:04:29 - INFO - [ad9e17b9-f6e0-44d4-a882-cc7bde714ced] Inference time: 28.58 seconds, CPU usage: 37.3%, CPU core utilization: [41.7, 37.5, 52.9, 17.2] +2025-08-21 00:04:29 - INFO - [ad9e17b9-f6e0-44d4-a882-cc7bde714ced] Cleaned up temporary frame directory: temp_videos/ad9e17b9-f6e0-44d4-a882-cc7bde714ced +2025-08-21 00:04:29 - INFO - [55b659d8-42d1-481b-b2f4-d9ac78a772cd] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_022.mp4' +2025-08-21 00:04:29 - INFO - [55b659d8-42d1-481b-b2f4-d9ac78a772cd] Video saved to temporary file: temp_videos/55b659d8-42d1-481b-b2f4-d9ac78a772cd.mp4 +2025-08-21 00:04:29 - INFO - [55b659d8-42d1-481b-b2f4-d9ac78a772cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:04:33 - INFO - [55b659d8-42d1-481b-b2f4-d9ac78a772cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:04:33 - INFO - [55b659d8-42d1-481b-b2f4-d9ac78a772cd] 30 frames saved to temp_videos/55b659d8-42d1-481b-b2f4-d9ac78a772cd +2025-08-21 00:04:46 - INFO - vision_config is None, using default vision config +2025-08-21 00:04:55 - INFO - Tokens per second: 2.3146754530784004, Peak GPU memory MB: 11824.375 +2025-08-21 00:04:55 - INFO - [55b659d8-42d1-481b-b2f4-d9ac78a772cd] Inference time: 25.85 seconds, CPU usage: 38.1%, CPU core utilization: [69.0, 33.5, 30.6, 19.4] +2025-08-21 00:04:55 - INFO - [55b659d8-42d1-481b-b2f4-d9ac78a772cd] Cleaned up temporary frame directory: temp_videos/55b659d8-42d1-481b-b2f4-d9ac78a772cd +2025-08-21 00:04:55 - INFO - [e8440f1e-8228-4173-8d4e-75e3bd0655f0] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_023.mp4' +2025-08-21 00:04:55 - INFO - [e8440f1e-8228-4173-8d4e-75e3bd0655f0] Video saved to temporary file: temp_videos/e8440f1e-8228-4173-8d4e-75e3bd0655f0.mp4 +2025-08-21 00:04:55 - INFO - [e8440f1e-8228-4173-8d4e-75e3bd0655f0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:04:59 - INFO - [e8440f1e-8228-4173-8d4e-75e3bd0655f0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:04:59 - INFO - [e8440f1e-8228-4173-8d4e-75e3bd0655f0] 30 frames saved to temp_videos/e8440f1e-8228-4173-8d4e-75e3bd0655f0 +2025-08-21 00:05:12 - INFO - vision_config is None, using default vision config +2025-08-21 00:05:23 - INFO - Tokens per second: 4.957384579566463, Peak GPU memory MB: 11824.375 +2025-08-21 00:05:23 - INFO - [e8440f1e-8228-4173-8d4e-75e3bd0655f0] Inference time: 28.24 seconds, CPU usage: 37.5%, CPU core utilization: [33.0, 50.8, 47.3, 18.9] +2025-08-21 00:05:23 - INFO - [e8440f1e-8228-4173-8d4e-75e3bd0655f0] Cleaned up temporary frame directory: temp_videos/e8440f1e-8228-4173-8d4e-75e3bd0655f0 +2025-08-21 00:05:23 - INFO - [1f996954-f0cf-4ed2-813a-0fe738be6d01] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_024.mp4' +2025-08-21 00:05:23 - INFO - [1f996954-f0cf-4ed2-813a-0fe738be6d01] Video saved to temporary file: temp_videos/1f996954-f0cf-4ed2-813a-0fe738be6d01.mp4 +2025-08-21 00:05:23 - INFO - [1f996954-f0cf-4ed2-813a-0fe738be6d01] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:05:28 - INFO - [1f996954-f0cf-4ed2-813a-0fe738be6d01] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:05:28 - INFO - [1f996954-f0cf-4ed2-813a-0fe738be6d01] 30 frames saved to temp_videos/1f996954-f0cf-4ed2-813a-0fe738be6d01 +2025-08-21 00:05:41 - INFO - vision_config is None, using default vision config +2025-08-21 00:05:55 - INFO - Tokens per second: 7.516056826941947, Peak GPU memory MB: 11824.375 +2025-08-21 00:05:55 - INFO - [1f996954-f0cf-4ed2-813a-0fe738be6d01] Inference time: 32.20 seconds, CPU usage: 35.6%, CPU core utilization: [33.4, 36.1, 32.6, 40.1] +2025-08-21 00:05:55 - INFO - [1f996954-f0cf-4ed2-813a-0fe738be6d01] Cleaned up temporary frame directory: temp_videos/1f996954-f0cf-4ed2-813a-0fe738be6d01 +2025-08-21 00:05:55 - INFO - [0750de05-37d6-46f0-bd2c-9e1e5a7140f8] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_025.mp4' +2025-08-21 00:05:55 - INFO - [0750de05-37d6-46f0-bd2c-9e1e5a7140f8] Video saved to temporary file: temp_videos/0750de05-37d6-46f0-bd2c-9e1e5a7140f8.mp4 +2025-08-21 00:05:55 - INFO - [0750de05-37d6-46f0-bd2c-9e1e5a7140f8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:06:00 - INFO - [0750de05-37d6-46f0-bd2c-9e1e5a7140f8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:06:00 - INFO - [0750de05-37d6-46f0-bd2c-9e1e5a7140f8] 30 frames saved to temp_videos/0750de05-37d6-46f0-bd2c-9e1e5a7140f8 +2025-08-21 00:06:13 - INFO - vision_config is None, using default vision config +2025-08-21 00:06:23 - INFO - Tokens per second: 4.695294352922656, Peak GPU memory MB: 11824.375 +2025-08-21 00:06:23 - INFO - [0750de05-37d6-46f0-bd2c-9e1e5a7140f8] Inference time: 27.94 seconds, CPU usage: 37.5%, CPU core utilization: [52.4, 21.4, 57.3, 18.7] +2025-08-21 00:06:23 - INFO - [0750de05-37d6-46f0-bd2c-9e1e5a7140f8] Cleaned up temporary frame directory: temp_videos/0750de05-37d6-46f0-bd2c-9e1e5a7140f8 +2025-08-21 00:06:23 - INFO - [01029caa-6289-4c40-9cbc-a4ae5a937946] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_026.mp4' +2025-08-21 00:06:23 - INFO - [01029caa-6289-4c40-9cbc-a4ae5a937946] Video saved to temporary file: temp_videos/01029caa-6289-4c40-9cbc-a4ae5a937946.mp4 +2025-08-21 00:06:23 - INFO - [01029caa-6289-4c40-9cbc-a4ae5a937946] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:06:28 - INFO - [01029caa-6289-4c40-9cbc-a4ae5a937946] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:06:28 - INFO - [01029caa-6289-4c40-9cbc-a4ae5a937946] 30 frames saved to temp_videos/01029caa-6289-4c40-9cbc-a4ae5a937946 +2025-08-21 00:06:41 - INFO - vision_config is None, using default vision config +2025-08-21 00:06:54 - INFO - Tokens per second: 6.726536746978091, Peak GPU memory MB: 11824.375 +2025-08-21 00:06:54 - INFO - [01029caa-6289-4c40-9cbc-a4ae5a937946] Inference time: 30.64 seconds, CPU usage: 36.4%, CPU core utilization: [24.0, 59.4, 17.0, 45.3] +2025-08-21 00:06:54 - INFO - [01029caa-6289-4c40-9cbc-a4ae5a937946] Cleaned up temporary frame directory: temp_videos/01029caa-6289-4c40-9cbc-a4ae5a937946 +2025-08-21 00:06:54 - INFO - [f210db90-ebcd-4bb7-bbf4-c28a7a20457b] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_027.mp4' +2025-08-21 00:06:54 - INFO - [f210db90-ebcd-4bb7-bbf4-c28a7a20457b] Video saved to temporary file: temp_videos/f210db90-ebcd-4bb7-bbf4-c28a7a20457b.mp4 +2025-08-21 00:06:54 - INFO - [f210db90-ebcd-4bb7-bbf4-c28a7a20457b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:06:58 - INFO - [f210db90-ebcd-4bb7-bbf4-c28a7a20457b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:06:58 - INFO - [f210db90-ebcd-4bb7-bbf4-c28a7a20457b] 30 frames saved to temp_videos/f210db90-ebcd-4bb7-bbf4-c28a7a20457b +2025-08-21 00:07:11 - INFO - vision_config is None, using default vision config +2025-08-21 00:07:24 - INFO - Tokens per second: 6.553781610815721, Peak GPU memory MB: 11824.375 +2025-08-21 00:07:24 - INFO - [f210db90-ebcd-4bb7-bbf4-c28a7a20457b] Inference time: 30.34 seconds, CPU usage: 36.5%, CPU core utilization: [32.0, 56.7, 38.4, 18.8] +2025-08-21 00:07:24 - INFO - [f210db90-ebcd-4bb7-bbf4-c28a7a20457b] Cleaned up temporary frame directory: temp_videos/f210db90-ebcd-4bb7-bbf4-c28a7a20457b +2025-08-21 00:07:24 - INFO - [c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_028.mp4' +2025-08-21 00:07:24 - INFO - [c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c] Video saved to temporary file: temp_videos/c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c.mp4 +2025-08-21 00:07:24 - INFO - [c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:07:29 - INFO - [c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:07:29 - INFO - [c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c] 30 frames saved to temp_videos/c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c +2025-08-21 00:07:42 - INFO - vision_config is None, using default vision config +2025-08-21 00:07:55 - INFO - Tokens per second: 6.759111802459959, Peak GPU memory MB: 11824.375 +2025-08-21 00:07:55 - INFO - [c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c] Inference time: 30.70 seconds, CPU usage: 36.3%, CPU core utilization: [31.2, 20.1, 76.9, 17.1] +2025-08-21 00:07:55 - INFO - [c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c] Cleaned up temporary frame directory: temp_videos/c32e0c9f-bf3d-4cb5-a0da-af1e3e029c5c +2025-08-21 00:07:55 - INFO - [9ca4ad09-6652-4495-9660-8e536730d426] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_029.mp4' +2025-08-21 00:07:55 - INFO - [9ca4ad09-6652-4495-9660-8e536730d426] Video saved to temporary file: temp_videos/9ca4ad09-6652-4495-9660-8e536730d426.mp4 +2025-08-21 00:07:55 - INFO - [9ca4ad09-6652-4495-9660-8e536730d426] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:08:00 - INFO - [9ca4ad09-6652-4495-9660-8e536730d426] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:08:00 - INFO - [9ca4ad09-6652-4495-9660-8e536730d426] 30 frames saved to temp_videos/9ca4ad09-6652-4495-9660-8e536730d426 +2025-08-21 00:08:13 - INFO - vision_config is None, using default vision config +2025-08-21 00:08:32 - INFO - Tokens per second: 9.263214649215994, Peak GPU memory MB: 11824.375 +2025-08-21 00:08:32 - INFO - [9ca4ad09-6652-4495-9660-8e536730d426] Inference time: 37.69 seconds, CPU usage: 34.5%, CPU core utilization: [53.9, 19.4, 24.3, 40.4] +2025-08-21 00:08:32 - INFO - [9ca4ad09-6652-4495-9660-8e536730d426] Cleaned up temporary frame directory: temp_videos/9ca4ad09-6652-4495-9660-8e536730d426 +2025-08-21 00:08:32 - INFO - [df092fec-bdf9-43d3-bdc0-e3addd960939] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_030.mp4' +2025-08-21 00:08:32 - INFO - [df092fec-bdf9-43d3-bdc0-e3addd960939] Video saved to temporary file: temp_videos/df092fec-bdf9-43d3-bdc0-e3addd960939.mp4 +2025-08-21 00:08:32 - INFO - [df092fec-bdf9-43d3-bdc0-e3addd960939] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:08:37 - INFO - [df092fec-bdf9-43d3-bdc0-e3addd960939] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:08:37 - INFO - [df092fec-bdf9-43d3-bdc0-e3addd960939] 30 frames saved to temp_videos/df092fec-bdf9-43d3-bdc0-e3addd960939 +2025-08-21 00:08:50 - INFO - vision_config is None, using default vision config +2025-08-21 00:09:00 - INFO - Tokens per second: 4.063290188399236, Peak GPU memory MB: 11824.375 +2025-08-21 00:09:00 - INFO - [df092fec-bdf9-43d3-bdc0-e3addd960939] Inference time: 27.34 seconds, CPU usage: 37.5%, CPU core utilization: [50.1, 20.2, 61.0, 18.7] +2025-08-21 00:09:00 - INFO - [df092fec-bdf9-43d3-bdc0-e3addd960939] Cleaned up temporary frame directory: temp_videos/df092fec-bdf9-43d3-bdc0-e3addd960939 +2025-08-21 00:09:00 - INFO - [ca418df5-894b-4902-ba9d-0e23d1dd86ab] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_031.mp4' +2025-08-21 00:09:00 - INFO - [ca418df5-894b-4902-ba9d-0e23d1dd86ab] Video saved to temporary file: temp_videos/ca418df5-894b-4902-ba9d-0e23d1dd86ab.mp4 +2025-08-21 00:09:00 - INFO - [ca418df5-894b-4902-ba9d-0e23d1dd86ab] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:09:05 - INFO - [ca418df5-894b-4902-ba9d-0e23d1dd86ab] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:09:05 - INFO - [ca418df5-894b-4902-ba9d-0e23d1dd86ab] 30 frames saved to temp_videos/ca418df5-894b-4902-ba9d-0e23d1dd86ab +2025-08-21 00:09:17 - INFO - vision_config is None, using default vision config +2025-08-21 00:09:31 - INFO - Tokens per second: 7.1731396320969125, Peak GPU memory MB: 11824.375 +2025-08-21 00:09:31 - INFO - [ca418df5-894b-4902-ba9d-0e23d1dd86ab] Inference time: 31.49 seconds, CPU usage: 36.2%, CPU core utilization: [35.7, 20.6, 19.6, 68.6] +2025-08-21 00:09:31 - INFO - [ca418df5-894b-4902-ba9d-0e23d1dd86ab] Cleaned up temporary frame directory: temp_videos/ca418df5-894b-4902-ba9d-0e23d1dd86ab +2025-08-21 00:09:31 - INFO - [e88d0f9b-c698-414c-858d-003fae28cdf3] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_032.mp4' +2025-08-21 00:09:31 - INFO - [e88d0f9b-c698-414c-858d-003fae28cdf3] Video saved to temporary file: temp_videos/e88d0f9b-c698-414c-858d-003fae28cdf3.mp4 +2025-08-21 00:09:31 - INFO - [e88d0f9b-c698-414c-858d-003fae28cdf3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:09:36 - INFO - [e88d0f9b-c698-414c-858d-003fae28cdf3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:09:36 - INFO - [e88d0f9b-c698-414c-858d-003fae28cdf3] 30 frames saved to temp_videos/e88d0f9b-c698-414c-858d-003fae28cdf3 +2025-08-21 00:09:49 - INFO - vision_config is None, using default vision config +2025-08-21 00:10:02 - INFO - Tokens per second: 6.459775499170554, Peak GPU memory MB: 11824.375 +2025-08-21 00:10:02 - INFO - [e88d0f9b-c698-414c-858d-003fae28cdf3] Inference time: 30.22 seconds, CPU usage: 36.5%, CPU core utilization: [40.0, 21.9, 46.6, 37.5] +2025-08-21 00:10:02 - INFO - [e88d0f9b-c698-414c-858d-003fae28cdf3] Cleaned up temporary frame directory: temp_videos/e88d0f9b-c698-414c-858d-003fae28cdf3 +2025-08-21 00:10:02 - INFO - [53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_033.mp4' +2025-08-21 00:10:02 - INFO - [53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae] Video saved to temporary file: temp_videos/53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae.mp4 +2025-08-21 00:10:02 - INFO - [53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:10:06 - INFO - [53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:10:06 - INFO - [53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae] 30 frames saved to temp_videos/53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae +2025-08-21 00:10:19 - INFO - vision_config is None, using default vision config +2025-08-21 00:10:37 - INFO - Tokens per second: 8.532561247112476, Peak GPU memory MB: 11824.375 +2025-08-21 00:10:37 - INFO - [53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae] Inference time: 34.97 seconds, CPU usage: 35.2%, CPU core utilization: [16.2, 20.7, 47.4, 56.6] +2025-08-21 00:10:37 - INFO - [53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae] Cleaned up temporary frame directory: temp_videos/53a4ed5c-13d3-4b71-b5f6-ce4d784c05ae +2025-08-21 00:10:37 - INFO - [ccd7f7c1-6c22-4ee5-876a-1c19b01e0607] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_034.mp4' +2025-08-21 00:10:37 - INFO - [ccd7f7c1-6c22-4ee5-876a-1c19b01e0607] Video saved to temporary file: temp_videos/ccd7f7c1-6c22-4ee5-876a-1c19b01e0607.mp4 +2025-08-21 00:10:37 - INFO - [ccd7f7c1-6c22-4ee5-876a-1c19b01e0607] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:10:41 - INFO - [ccd7f7c1-6c22-4ee5-876a-1c19b01e0607] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:10:41 - INFO - [ccd7f7c1-6c22-4ee5-876a-1c19b01e0607] 30 frames saved to temp_videos/ccd7f7c1-6c22-4ee5-876a-1c19b01e0607 +2025-08-21 00:10:54 - INFO - vision_config is None, using default vision config +2025-08-21 00:11:06 - INFO - Tokens per second: 6.093386425880963, Peak GPU memory MB: 11824.375 +2025-08-21 00:11:06 - INFO - [ccd7f7c1-6c22-4ee5-876a-1c19b01e0607] Inference time: 29.67 seconds, CPU usage: 36.6%, CPU core utilization: [47.7, 65.3, 17.1, 16.5] +2025-08-21 00:11:06 - INFO - [ccd7f7c1-6c22-4ee5-876a-1c19b01e0607] Cleaned up temporary frame directory: temp_videos/ccd7f7c1-6c22-4ee5-876a-1c19b01e0607 +2025-08-21 00:11:06 - INFO - [496ad230-f9e4-468b-9097-aae0d083dd17] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_035.mp4' +2025-08-21 00:11:06 - INFO - [496ad230-f9e4-468b-9097-aae0d083dd17] Video saved to temporary file: temp_videos/496ad230-f9e4-468b-9097-aae0d083dd17.mp4 +2025-08-21 00:11:06 - INFO - [496ad230-f9e4-468b-9097-aae0d083dd17] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:11:11 - INFO - [496ad230-f9e4-468b-9097-aae0d083dd17] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:11:11 - INFO - [496ad230-f9e4-468b-9097-aae0d083dd17] 30 frames saved to temp_videos/496ad230-f9e4-468b-9097-aae0d083dd17 +2025-08-21 00:11:24 - INFO - vision_config is None, using default vision config +2025-08-21 00:11:39 - INFO - Tokens per second: 7.846984662180754, Peak GPU memory MB: 11824.375 +2025-08-21 00:11:39 - INFO - [496ad230-f9e4-468b-9097-aae0d083dd17] Inference time: 33.09 seconds, CPU usage: 35.7%, CPU core utilization: [14.8, 15.5, 59.9, 52.6] +2025-08-21 00:11:39 - INFO - [496ad230-f9e4-468b-9097-aae0d083dd17] Cleaned up temporary frame directory: temp_videos/496ad230-f9e4-468b-9097-aae0d083dd17 +2025-08-21 00:11:39 - INFO - [124ab1d5-65f5-4f6f-8641-57c4c85808d4] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_036.mp4' +2025-08-21 00:11:39 - INFO - [124ab1d5-65f5-4f6f-8641-57c4c85808d4] Video saved to temporary file: temp_videos/124ab1d5-65f5-4f6f-8641-57c4c85808d4.mp4 +2025-08-21 00:11:39 - INFO - [124ab1d5-65f5-4f6f-8641-57c4c85808d4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:11:44 - INFO - [124ab1d5-65f5-4f6f-8641-57c4c85808d4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:11:44 - INFO - [124ab1d5-65f5-4f6f-8641-57c4c85808d4] 30 frames saved to temp_videos/124ab1d5-65f5-4f6f-8641-57c4c85808d4 +2025-08-21 00:11:57 - INFO - vision_config is None, using default vision config +2025-08-21 00:12:07 - INFO - Tokens per second: 4.356472577241336, Peak GPU memory MB: 11824.375 +2025-08-21 00:12:07 - INFO - [124ab1d5-65f5-4f6f-8641-57c4c85808d4] Inference time: 27.59 seconds, CPU usage: 37.4%, CPU core utilization: [20.9, 70.7, 18.3, 40.0] +2025-08-21 00:12:07 - INFO - [124ab1d5-65f5-4f6f-8641-57c4c85808d4] Cleaned up temporary frame directory: temp_videos/124ab1d5-65f5-4f6f-8641-57c4c85808d4 +2025-08-21 00:12:07 - INFO - [e52f096d-c451-4930-83cf-bb8210f55a92] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_037.mp4' +2025-08-21 00:12:07 - INFO - [e52f096d-c451-4930-83cf-bb8210f55a92] Video saved to temporary file: temp_videos/e52f096d-c451-4930-83cf-bb8210f55a92.mp4 +2025-08-21 00:12:07 - INFO - [e52f096d-c451-4930-83cf-bb8210f55a92] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:12:12 - INFO - [e52f096d-c451-4930-83cf-bb8210f55a92] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:12:12 - INFO - [e52f096d-c451-4930-83cf-bb8210f55a92] 30 frames saved to temp_videos/e52f096d-c451-4930-83cf-bb8210f55a92 +2025-08-21 00:12:25 - INFO - vision_config is None, using default vision config +2025-08-21 00:12:35 - INFO - Tokens per second: 5.138801059166309, Peak GPU memory MB: 11824.375 +2025-08-21 00:12:35 - INFO - [e52f096d-c451-4930-83cf-bb8210f55a92] Inference time: 28.53 seconds, CPU usage: 37.3%, CPU core utilization: [47.6, 38.0, 17.4, 46.2] +2025-08-21 00:12:35 - INFO - [e52f096d-c451-4930-83cf-bb8210f55a92] Cleaned up temporary frame directory: temp_videos/e52f096d-c451-4930-83cf-bb8210f55a92 +2025-08-21 00:12:35 - INFO - [00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_038.mp4' +2025-08-21 00:12:35 - INFO - [00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a] Video saved to temporary file: temp_videos/00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a.mp4 +2025-08-21 00:12:35 - INFO - [00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:12:40 - INFO - [00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:12:40 - INFO - [00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a] 30 frames saved to temp_videos/00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a +2025-08-21 00:12:53 - INFO - vision_config is None, using default vision config +2025-08-21 00:13:02 - INFO - Tokens per second: 3.425754264592105, Peak GPU memory MB: 11824.375 +2025-08-21 00:13:02 - INFO - [00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a] Inference time: 26.76 seconds, CPU usage: 37.7%, CPU core utilization: [23.2, 51.9, 42.6, 33.3] +2025-08-21 00:13:02 - INFO - [00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a] Cleaned up temporary frame directory: temp_videos/00c8bc41-e1e0-4ab7-8072-cd4bc78fe18a +2025-08-21 00:13:02 - INFO - [a840eac1-a5b6-435f-aceb-589aa77afa45] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_039.mp4' +2025-08-21 00:13:02 - INFO - [a840eac1-a5b6-435f-aceb-589aa77afa45] Video saved to temporary file: temp_videos/a840eac1-a5b6-435f-aceb-589aa77afa45.mp4 +2025-08-21 00:13:02 - INFO - [a840eac1-a5b6-435f-aceb-589aa77afa45] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:13:07 - INFO - [a840eac1-a5b6-435f-aceb-589aa77afa45] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:13:07 - INFO - [a840eac1-a5b6-435f-aceb-589aa77afa45] 30 frames saved to temp_videos/a840eac1-a5b6-435f-aceb-589aa77afa45 +2025-08-21 00:13:20 - INFO - vision_config is None, using default vision config +2025-08-21 00:13:30 - INFO - Tokens per second: 4.69602386319334, Peak GPU memory MB: 11824.375 +2025-08-21 00:13:30 - INFO - [a840eac1-a5b6-435f-aceb-589aa77afa45] Inference time: 27.88 seconds, CPU usage: 37.3%, CPU core utilization: [51.3, 17.6, 19.3, 60.6] +2025-08-21 00:13:30 - INFO - [a840eac1-a5b6-435f-aceb-589aa77afa45] Cleaned up temporary frame directory: temp_videos/a840eac1-a5b6-435f-aceb-589aa77afa45 +2025-08-21 00:13:30 - INFO - [79fff7f5-155c-4aab-a7fd-d432cf110dde] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_040.mp4' +2025-08-21 00:13:30 - INFO - [79fff7f5-155c-4aab-a7fd-d432cf110dde] Video saved to temporary file: temp_videos/79fff7f5-155c-4aab-a7fd-d432cf110dde.mp4 +2025-08-21 00:13:30 - INFO - [79fff7f5-155c-4aab-a7fd-d432cf110dde] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:13:35 - INFO - [79fff7f5-155c-4aab-a7fd-d432cf110dde] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:13:35 - INFO - [79fff7f5-155c-4aab-a7fd-d432cf110dde] 30 frames saved to temp_videos/79fff7f5-155c-4aab-a7fd-d432cf110dde +2025-08-21 00:13:48 - INFO - vision_config is None, using default vision config +2025-08-21 00:14:06 - INFO - Tokens per second: 8.923322929883941, Peak GPU memory MB: 11824.375 +2025-08-21 00:14:06 - INFO - [79fff7f5-155c-4aab-a7fd-d432cf110dde] Inference time: 36.23 seconds, CPU usage: 34.9%, CPU core utilization: [26.5, 17.6, 53.3, 42.2] +2025-08-21 00:14:06 - INFO - [79fff7f5-155c-4aab-a7fd-d432cf110dde] Cleaned up temporary frame directory: temp_videos/79fff7f5-155c-4aab-a7fd-d432cf110dde +2025-08-21 00:14:06 - INFO - [5a62181d-1328-4203-92fb-95497b40e1cf] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_041.mp4' +2025-08-21 00:14:06 - INFO - [5a62181d-1328-4203-92fb-95497b40e1cf] Video saved to temporary file: temp_videos/5a62181d-1328-4203-92fb-95497b40e1cf.mp4 +2025-08-21 00:14:06 - INFO - [5a62181d-1328-4203-92fb-95497b40e1cf] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:14:11 - INFO - [5a62181d-1328-4203-92fb-95497b40e1cf] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:14:11 - INFO - [5a62181d-1328-4203-92fb-95497b40e1cf] 30 frames saved to temp_videos/5a62181d-1328-4203-92fb-95497b40e1cf +2025-08-21 00:14:24 - INFO - vision_config is None, using default vision config +2025-08-21 00:14:37 - INFO - Tokens per second: 6.836469983622892, Peak GPU memory MB: 11824.375 +2025-08-21 00:14:37 - INFO - [5a62181d-1328-4203-92fb-95497b40e1cf] Inference time: 31.03 seconds, CPU usage: 36.3%, CPU core utilization: [30.2, 49.8, 43.0, 22.1] +2025-08-21 00:14:37 - INFO - [5a62181d-1328-4203-92fb-95497b40e1cf] Cleaned up temporary frame directory: temp_videos/5a62181d-1328-4203-92fb-95497b40e1cf +2025-08-21 00:14:37 - INFO - [b8f2e591-45db-423c-9af0-ed74004baae0] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_042.mp4' +2025-08-21 00:14:37 - INFO - [b8f2e591-45db-423c-9af0-ed74004baae0] Video saved to temporary file: temp_videos/b8f2e591-45db-423c-9af0-ed74004baae0.mp4 +2025-08-21 00:14:37 - INFO - [b8f2e591-45db-423c-9af0-ed74004baae0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:14:42 - INFO - [b8f2e591-45db-423c-9af0-ed74004baae0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:14:42 - INFO - [b8f2e591-45db-423c-9af0-ed74004baae0] 30 frames saved to temp_videos/b8f2e591-45db-423c-9af0-ed74004baae0 +2025-08-21 00:14:55 - INFO - vision_config is None, using default vision config +2025-08-21 00:15:07 - INFO - Tokens per second: 5.799046418435553, Peak GPU memory MB: 11824.375 +2025-08-21 00:15:07 - INFO - [b8f2e591-45db-423c-9af0-ed74004baae0] Inference time: 29.23 seconds, CPU usage: 36.6%, CPU core utilization: [52.7, 42.9, 16.9, 34.0] +2025-08-21 00:15:07 - INFO - [b8f2e591-45db-423c-9af0-ed74004baae0] Cleaned up temporary frame directory: temp_videos/b8f2e591-45db-423c-9af0-ed74004baae0 +2025-08-21 00:15:07 - INFO - [d32f2792-3f75-4bb2-803e-e3d8f5ab1a76] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_043.mp4' +2025-08-21 00:15:07 - INFO - [d32f2792-3f75-4bb2-803e-e3d8f5ab1a76] Video saved to temporary file: temp_videos/d32f2792-3f75-4bb2-803e-e3d8f5ab1a76.mp4 +2025-08-21 00:15:07 - INFO - [d32f2792-3f75-4bb2-803e-e3d8f5ab1a76] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:15:12 - INFO - [d32f2792-3f75-4bb2-803e-e3d8f5ab1a76] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:15:12 - INFO - [d32f2792-3f75-4bb2-803e-e3d8f5ab1a76] 30 frames saved to temp_videos/d32f2792-3f75-4bb2-803e-e3d8f5ab1a76 +2025-08-21 00:15:25 - INFO - vision_config is None, using default vision config +2025-08-21 00:15:40 - INFO - Tokens per second: 7.766284842271324, Peak GPU memory MB: 11824.375 +2025-08-21 00:15:40 - INFO - [d32f2792-3f75-4bb2-803e-e3d8f5ab1a76] Inference time: 32.92 seconds, CPU usage: 35.7%, CPU core utilization: [16.6, 43.3, 58.1, 24.8] +2025-08-21 00:15:40 - INFO - [d32f2792-3f75-4bb2-803e-e3d8f5ab1a76] Cleaned up temporary frame directory: temp_videos/d32f2792-3f75-4bb2-803e-e3d8f5ab1a76 +2025-08-21 00:15:40 - INFO - [978e0423-475a-455d-aa66-ca499da86744] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_044.mp4' +2025-08-21 00:15:40 - INFO - [978e0423-475a-455d-aa66-ca499da86744] Video saved to temporary file: temp_videos/978e0423-475a-455d-aa66-ca499da86744.mp4 +2025-08-21 00:15:40 - INFO - [978e0423-475a-455d-aa66-ca499da86744] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:15:44 - INFO - [978e0423-475a-455d-aa66-ca499da86744] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:15:44 - INFO - [978e0423-475a-455d-aa66-ca499da86744] 30 frames saved to temp_videos/978e0423-475a-455d-aa66-ca499da86744 +2025-08-21 00:15:57 - INFO - vision_config is None, using default vision config +2025-08-21 00:16:07 - INFO - Tokens per second: 4.5631272946545245, Peak GPU memory MB: 11824.375 +2025-08-21 00:16:07 - INFO - [978e0423-475a-455d-aa66-ca499da86744] Inference time: 27.84 seconds, CPU usage: 37.3%, CPU core utilization: [25.7, 32.6, 18.2, 72.5] +2025-08-21 00:16:07 - INFO - [978e0423-475a-455d-aa66-ca499da86744] Cleaned up temporary frame directory: temp_videos/978e0423-475a-455d-aa66-ca499da86744 +2025-08-21 00:16:07 - INFO - [e4056e7f-abce-447f-9556-fc5a853b32aa] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_045.mp4' +2025-08-21 00:16:07 - INFO - [e4056e7f-abce-447f-9556-fc5a853b32aa] Video saved to temporary file: temp_videos/e4056e7f-abce-447f-9556-fc5a853b32aa.mp4 +2025-08-21 00:16:07 - INFO - [e4056e7f-abce-447f-9556-fc5a853b32aa] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:16:12 - INFO - [e4056e7f-abce-447f-9556-fc5a853b32aa] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:16:12 - INFO - [e4056e7f-abce-447f-9556-fc5a853b32aa] 30 frames saved to temp_videos/e4056e7f-abce-447f-9556-fc5a853b32aa +2025-08-21 00:16:25 - INFO - vision_config is None, using default vision config +2025-08-21 00:16:39 - INFO - Tokens per second: 7.282653773730579, Peak GPU memory MB: 11824.375 +2025-08-21 00:16:39 - INFO - [e4056e7f-abce-447f-9556-fc5a853b32aa] Inference time: 31.87 seconds, CPU usage: 36.1%, CPU core utilization: [40.4, 17.4, 32.9, 53.5] +2025-08-21 00:16:39 - INFO - [e4056e7f-abce-447f-9556-fc5a853b32aa] Cleaned up temporary frame directory: temp_videos/e4056e7f-abce-447f-9556-fc5a853b32aa +2025-08-21 00:16:39 - INFO - [5d08d2de-911a-498a-9b3a-f89597376f02] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_046.mp4' +2025-08-21 00:16:39 - INFO - [5d08d2de-911a-498a-9b3a-f89597376f02] Video saved to temporary file: temp_videos/5d08d2de-911a-498a-9b3a-f89597376f02.mp4 +2025-08-21 00:16:39 - INFO - [5d08d2de-911a-498a-9b3a-f89597376f02] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:16:44 - INFO - [5d08d2de-911a-498a-9b3a-f89597376f02] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:16:44 - INFO - [5d08d2de-911a-498a-9b3a-f89597376f02] 30 frames saved to temp_videos/5d08d2de-911a-498a-9b3a-f89597376f02 +2025-08-21 00:16:57 - INFO - vision_config is None, using default vision config +2025-08-21 00:17:11 - INFO - Tokens per second: 7.413608779975096, Peak GPU memory MB: 11824.375 +2025-08-21 00:17:11 - INFO - [5d08d2de-911a-498a-9b3a-f89597376f02] Inference time: 32.04 seconds, CPU usage: 36.2%, CPU core utilization: [15.1, 15.9, 55.1, 58.5] +2025-08-21 00:17:11 - INFO - [5d08d2de-911a-498a-9b3a-f89597376f02] Cleaned up temporary frame directory: temp_videos/5d08d2de-911a-498a-9b3a-f89597376f02 +2025-08-21 00:17:11 - INFO - [72154fe0-fabb-4695-a74c-07b13b4d70ee] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_047.mp4' +2025-08-21 00:17:11 - INFO - [72154fe0-fabb-4695-a74c-07b13b4d70ee] Video saved to temporary file: temp_videos/72154fe0-fabb-4695-a74c-07b13b4d70ee.mp4 +2025-08-21 00:17:11 - INFO - [72154fe0-fabb-4695-a74c-07b13b4d70ee] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:17:16 - INFO - [72154fe0-fabb-4695-a74c-07b13b4d70ee] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:17:16 - INFO - [72154fe0-fabb-4695-a74c-07b13b4d70ee] 30 frames saved to temp_videos/72154fe0-fabb-4695-a74c-07b13b4d70ee +2025-08-21 00:17:29 - INFO - vision_config is None, using default vision config +2025-08-21 00:17:52 - INFO - Tokens per second: 9.744964420585076, Peak GPU memory MB: 11824.375 +2025-08-21 00:17:52 - INFO - [72154fe0-fabb-4695-a74c-07b13b4d70ee] Inference time: 40.16 seconds, CPU usage: 34.8%, CPU core utilization: [16.9, 39.2, 15.3, 67.8] +2025-08-21 00:17:52 - INFO - [72154fe0-fabb-4695-a74c-07b13b4d70ee] Cleaned up temporary frame directory: temp_videos/72154fe0-fabb-4695-a74c-07b13b4d70ee +2025-08-21 00:17:52 - INFO - [66d33b91-3d66-4c03-8dce-a78ba0c64f20] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_048.mp4' +2025-08-21 00:17:52 - INFO - [66d33b91-3d66-4c03-8dce-a78ba0c64f20] Video saved to temporary file: temp_videos/66d33b91-3d66-4c03-8dce-a78ba0c64f20.mp4 +2025-08-21 00:17:52 - INFO - [66d33b91-3d66-4c03-8dce-a78ba0c64f20] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:17:56 - INFO - [66d33b91-3d66-4c03-8dce-a78ba0c64f20] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:17:56 - INFO - [66d33b91-3d66-4c03-8dce-a78ba0c64f20] 30 frames saved to temp_videos/66d33b91-3d66-4c03-8dce-a78ba0c64f20 +2025-08-21 00:18:09 - INFO - vision_config is None, using default vision config +2025-08-21 00:18:21 - INFO - Tokens per second: 5.534022820709767, Peak GPU memory MB: 11824.375 +2025-08-21 00:18:21 - INFO - [66d33b91-3d66-4c03-8dce-a78ba0c64f20] Inference time: 28.96 seconds, CPU usage: 36.9%, CPU core utilization: [64.7, 22.2, 29.9, 30.5] +2025-08-21 00:18:21 - INFO - [66d33b91-3d66-4c03-8dce-a78ba0c64f20] Cleaned up temporary frame directory: temp_videos/66d33b91-3d66-4c03-8dce-a78ba0c64f20 +2025-08-21 00:18:21 - INFO - [f92833f5-5c53-4d98-bf8a-1686007f24e8] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_049.mp4' +2025-08-21 00:18:21 - INFO - [f92833f5-5c53-4d98-bf8a-1686007f24e8] Video saved to temporary file: temp_videos/f92833f5-5c53-4d98-bf8a-1686007f24e8.mp4 +2025-08-21 00:18:21 - INFO - [f92833f5-5c53-4d98-bf8a-1686007f24e8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:18:25 - INFO - [f92833f5-5c53-4d98-bf8a-1686007f24e8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:18:25 - INFO - [f92833f5-5c53-4d98-bf8a-1686007f24e8] 30 frames saved to temp_videos/f92833f5-5c53-4d98-bf8a-1686007f24e8 +2025-08-21 00:18:38 - INFO - vision_config is None, using default vision config +2025-08-21 00:18:51 - INFO - Tokens per second: 6.507467905304537, Peak GPU memory MB: 11824.375 +2025-08-21 00:18:51 - INFO - [f92833f5-5c53-4d98-bf8a-1686007f24e8] Inference time: 30.29 seconds, CPU usage: 36.6%, CPU core utilization: [53.7, 28.6, 33.4, 30.8] +2025-08-21 00:18:51 - INFO - [f92833f5-5c53-4d98-bf8a-1686007f24e8] Cleaned up temporary frame directory: temp_videos/f92833f5-5c53-4d98-bf8a-1686007f24e8 +2025-08-21 00:18:51 - INFO - [08e97c3f-0eee-4b5d-a9f5-51984817e6f9] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_050.mp4' +2025-08-21 00:18:51 - INFO - [08e97c3f-0eee-4b5d-a9f5-51984817e6f9] Video saved to temporary file: temp_videos/08e97c3f-0eee-4b5d-a9f5-51984817e6f9.mp4 +2025-08-21 00:18:51 - INFO - [08e97c3f-0eee-4b5d-a9f5-51984817e6f9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:18:56 - INFO - [08e97c3f-0eee-4b5d-a9f5-51984817e6f9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:18:56 - INFO - [08e97c3f-0eee-4b5d-a9f5-51984817e6f9] 30 frames saved to temp_videos/08e97c3f-0eee-4b5d-a9f5-51984817e6f9 +2025-08-21 00:19:09 - INFO - vision_config is None, using default vision config +2025-08-21 00:19:18 - INFO - Tokens per second: 3.2541179562626197, Peak GPU memory MB: 11824.375 +2025-08-21 00:19:18 - INFO - [08e97c3f-0eee-4b5d-a9f5-51984817e6f9] Inference time: 26.65 seconds, CPU usage: 37.9%, CPU core utilization: [48.0, 17.9, 20.3, 65.1] +2025-08-21 00:19:18 - INFO - [08e97c3f-0eee-4b5d-a9f5-51984817e6f9] Cleaned up temporary frame directory: temp_videos/08e97c3f-0eee-4b5d-a9f5-51984817e6f9 +2025-08-21 00:19:18 - INFO - [fa21ed16-4a4b-4252-a232-b1be5c853f7d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_051.mp4' +2025-08-21 00:19:18 - INFO - [fa21ed16-4a4b-4252-a232-b1be5c853f7d] Video saved to temporary file: temp_videos/fa21ed16-4a4b-4252-a232-b1be5c853f7d.mp4 +2025-08-21 00:19:18 - INFO - [fa21ed16-4a4b-4252-a232-b1be5c853f7d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:19:22 - INFO - [fa21ed16-4a4b-4252-a232-b1be5c853f7d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:19:22 - INFO - [fa21ed16-4a4b-4252-a232-b1be5c853f7d] 30 frames saved to temp_videos/fa21ed16-4a4b-4252-a232-b1be5c853f7d +2025-08-21 00:19:35 - INFO - vision_config is None, using default vision config +2025-08-21 00:19:45 - INFO - Tokens per second: 3.912398850108666, Peak GPU memory MB: 11824.375 +2025-08-21 00:19:45 - INFO - [fa21ed16-4a4b-4252-a232-b1be5c853f7d] Inference time: 27.18 seconds, CPU usage: 37.2%, CPU core utilization: [62.7, 17.9, 17.4, 50.5] +2025-08-21 00:19:45 - INFO - [fa21ed16-4a4b-4252-a232-b1be5c853f7d] Cleaned up temporary frame directory: temp_videos/fa21ed16-4a4b-4252-a232-b1be5c853f7d +2025-08-21 00:19:45 - INFO - [5e7d1173-00c2-46a2-81a0-1dc1223bd7c2] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_052.mp4' +2025-08-21 00:19:45 - INFO - [5e7d1173-00c2-46a2-81a0-1dc1223bd7c2] Video saved to temporary file: temp_videos/5e7d1173-00c2-46a2-81a0-1dc1223bd7c2.mp4 +2025-08-21 00:19:45 - INFO - [5e7d1173-00c2-46a2-81a0-1dc1223bd7c2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:19:50 - INFO - [5e7d1173-00c2-46a2-81a0-1dc1223bd7c2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:19:50 - INFO - [5e7d1173-00c2-46a2-81a0-1dc1223bd7c2] 30 frames saved to temp_videos/5e7d1173-00c2-46a2-81a0-1dc1223bd7c2 +2025-08-21 00:20:02 - INFO - vision_config is None, using default vision config +2025-08-21 00:20:16 - INFO - Tokens per second: 7.248168780638238, Peak GPU memory MB: 11824.375 +2025-08-21 00:20:16 - INFO - [5e7d1173-00c2-46a2-81a0-1dc1223bd7c2] Inference time: 31.69 seconds, CPU usage: 36.1%, CPU core utilization: [60.1, 41.3, 16.2, 26.8] +2025-08-21 00:20:16 - INFO - [5e7d1173-00c2-46a2-81a0-1dc1223bd7c2] Cleaned up temporary frame directory: temp_videos/5e7d1173-00c2-46a2-81a0-1dc1223bd7c2 +2025-08-21 00:20:16 - INFO - [1456aa86-ac67-48b3-8739-8f5901221155] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_053.mp4' +2025-08-21 00:20:16 - INFO - [1456aa86-ac67-48b3-8739-8f5901221155] Video saved to temporary file: temp_videos/1456aa86-ac67-48b3-8739-8f5901221155.mp4 +2025-08-21 00:20:16 - INFO - [1456aa86-ac67-48b3-8739-8f5901221155] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:20:21 - INFO - [1456aa86-ac67-48b3-8739-8f5901221155] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:20:21 - INFO - [1456aa86-ac67-48b3-8739-8f5901221155] 30 frames saved to temp_videos/1456aa86-ac67-48b3-8739-8f5901221155 +2025-08-21 00:20:34 - INFO - vision_config is None, using default vision config +2025-08-21 00:20:47 - INFO - Tokens per second: 6.420566489349132, Peak GPU memory MB: 11824.375 +2025-08-21 00:20:47 - INFO - [1456aa86-ac67-48b3-8739-8f5901221155] Inference time: 30.22 seconds, CPU usage: 36.5%, CPU core utilization: [46.5, 21.6, 59.6, 18.1] +2025-08-21 00:20:47 - INFO - [1456aa86-ac67-48b3-8739-8f5901221155] Cleaned up temporary frame directory: temp_videos/1456aa86-ac67-48b3-8739-8f5901221155 +2025-08-21 00:20:47 - INFO - [b6dffc6c-f438-4737-809c-69a392f5e9c0] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_054.mp4' +2025-08-21 00:20:47 - INFO - [b6dffc6c-f438-4737-809c-69a392f5e9c0] Video saved to temporary file: temp_videos/b6dffc6c-f438-4737-809c-69a392f5e9c0.mp4 +2025-08-21 00:20:47 - INFO - [b6dffc6c-f438-4737-809c-69a392f5e9c0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:20:51 - INFO - [b6dffc6c-f438-4737-809c-69a392f5e9c0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:20:51 - INFO - [b6dffc6c-f438-4737-809c-69a392f5e9c0] 30 frames saved to temp_videos/b6dffc6c-f438-4737-809c-69a392f5e9c0 +2025-08-21 00:21:04 - INFO - vision_config is None, using default vision config +2025-08-21 00:21:15 - INFO - Tokens per second: 5.19902103533599, Peak GPU memory MB: 11824.375 +2025-08-21 00:21:15 - INFO - [b6dffc6c-f438-4737-809c-69a392f5e9c0] Inference time: 28.49 seconds, CPU usage: 37.6%, CPU core utilization: [49.0, 34.0, 48.4, 19.0] +2025-08-21 00:21:15 - INFO - [b6dffc6c-f438-4737-809c-69a392f5e9c0] Cleaned up temporary frame directory: temp_videos/b6dffc6c-f438-4737-809c-69a392f5e9c0 +2025-08-21 00:21:15 - INFO - [41c34e88-b17f-4653-a2d5-652b09643d82] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_055.mp4' +2025-08-21 00:21:15 - INFO - [41c34e88-b17f-4653-a2d5-652b09643d82] Video saved to temporary file: temp_videos/41c34e88-b17f-4653-a2d5-652b09643d82.mp4 +2025-08-21 00:21:15 - INFO - [41c34e88-b17f-4653-a2d5-652b09643d82] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:21:20 - INFO - [41c34e88-b17f-4653-a2d5-652b09643d82] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:21:20 - INFO - [41c34e88-b17f-4653-a2d5-652b09643d82] 30 frames saved to temp_videos/41c34e88-b17f-4653-a2d5-652b09643d82 +2025-08-21 00:21:33 - INFO - vision_config is None, using default vision config +2025-08-21 00:21:46 - INFO - Tokens per second: 6.72473013041542, Peak GPU memory MB: 11824.375 +2025-08-21 00:21:46 - INFO - [41c34e88-b17f-4653-a2d5-652b09643d82] Inference time: 30.70 seconds, CPU usage: 36.5%, CPU core utilization: [35.2, 45.0, 37.4, 28.4] +2025-08-21 00:21:46 - INFO - [41c34e88-b17f-4653-a2d5-652b09643d82] Cleaned up temporary frame directory: temp_videos/41c34e88-b17f-4653-a2d5-652b09643d82 +2025-08-21 00:21:46 - INFO - [1d8dced7-a51d-4146-b969-9cf087ebf060] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_056.mp4' +2025-08-21 00:21:46 - INFO - [1d8dced7-a51d-4146-b969-9cf087ebf060] Video saved to temporary file: temp_videos/1d8dced7-a51d-4146-b969-9cf087ebf060.mp4 +2025-08-21 00:21:46 - INFO - [1d8dced7-a51d-4146-b969-9cf087ebf060] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:21:51 - INFO - [1d8dced7-a51d-4146-b969-9cf087ebf060] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:21:51 - INFO - [1d8dced7-a51d-4146-b969-9cf087ebf060] 30 frames saved to temp_videos/1d8dced7-a51d-4146-b969-9cf087ebf060 +2025-08-21 00:22:04 - INFO - vision_config is None, using default vision config +2025-08-21 00:22:15 - INFO - Tokens per second: 5.483718629411901, Peak GPU memory MB: 11824.375 +2025-08-21 00:22:15 - INFO - [1d8dced7-a51d-4146-b969-9cf087ebf060] Inference time: 28.89 seconds, CPU usage: 37.1%, CPU core utilization: [27.1, 19.3, 43.8, 58.1] +2025-08-21 00:22:15 - INFO - [1d8dced7-a51d-4146-b969-9cf087ebf060] Cleaned up temporary frame directory: temp_videos/1d8dced7-a51d-4146-b969-9cf087ebf060 +2025-08-21 00:22:15 - INFO - [ccc3c8c3-225b-4c96-acfe-9c195f572e4a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_057.mp4' +2025-08-21 00:22:15 - INFO - [ccc3c8c3-225b-4c96-acfe-9c195f572e4a] Video saved to temporary file: temp_videos/ccc3c8c3-225b-4c96-acfe-9c195f572e4a.mp4 +2025-08-21 00:22:15 - INFO - [ccc3c8c3-225b-4c96-acfe-9c195f572e4a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:22:20 - INFO - [ccc3c8c3-225b-4c96-acfe-9c195f572e4a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:22:20 - INFO - [ccc3c8c3-225b-4c96-acfe-9c195f572e4a] 30 frames saved to temp_videos/ccc3c8c3-225b-4c96-acfe-9c195f572e4a +2025-08-21 00:22:33 - INFO - vision_config is None, using default vision config +2025-08-21 00:22:49 - INFO - Tokens per second: 8.073006063593622, Peak GPU memory MB: 11824.375 +2025-08-21 00:22:49 - INFO - [ccc3c8c3-225b-4c96-acfe-9c195f572e4a] Inference time: 33.72 seconds, CPU usage: 35.9%, CPU core utilization: [27.8, 59.7, 19.4, 37.0] +2025-08-21 00:22:49 - INFO - [ccc3c8c3-225b-4c96-acfe-9c195f572e4a] Cleaned up temporary frame directory: temp_videos/ccc3c8c3-225b-4c96-acfe-9c195f572e4a diff --git a/API_Transformers/logs/MiniCPM-V-4/20250821_002349.log b/API_Transformers/logs/MiniCPM-V-4/20250821_002349.log new file mode 100644 index 0000000000000000000000000000000000000000..6bcefa04c4ef5c14c90d7f55e01383192a411866 --- /dev/null +++ b/API_Transformers/logs/MiniCPM-V-4/20250821_002349.log @@ -0,0 +1,67 @@ +2025-08-21 00:23:49 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-21 00:23:50 - INFO - vision_config is None, using default vision config +2025-08-21 00:24:41 - INFO - Model loaded in 52.24 seconds +2025-08-21 00:24:41 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-21 00:24:48 - INFO - [675b6c5c-5524-4cc9-a700-76d2d090a7a4] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 00:24:48 - INFO - [675b6c5c-5524-4cc9-a700-76d2d090a7a4] Video saved to temporary file: temp_videos/675b6c5c-5524-4cc9-a700-76d2d090a7a4.mp4 +2025-08-21 00:24:48 - INFO - [675b6c5c-5524-4cc9-a700-76d2d090a7a4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:24:52 - INFO - [675b6c5c-5524-4cc9-a700-76d2d090a7a4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:24:53 - INFO - [675b6c5c-5524-4cc9-a700-76d2d090a7a4] 30 frames saved to temp_videos/675b6c5c-5524-4cc9-a700-76d2d090a7a4 +2025-08-21 00:25:09 - INFO - vision_config is None, using default vision config +2025-08-21 00:25:21 - INFO - Tokens per second: 5.985183148455991, Peak GPU memory MB: 11824.375 +2025-08-21 00:25:21 - INFO - [675b6c5c-5524-4cc9-a700-76d2d090a7a4] Inference time: 33.25 seconds, CPU usage: 37.3%, CPU core utilization: [37.0, 39.5, 36.4, 36.2] +2025-08-21 00:25:21 - INFO - [675b6c5c-5524-4cc9-a700-76d2d090a7a4] Cleaned up temporary frame directory: temp_videos/675b6c5c-5524-4cc9-a700-76d2d090a7a4 +2025-08-21 00:25:21 - INFO - [3315fc05-4535-4c30-910d-0b8c1a9c8855] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-21 00:25:21 - INFO - [3315fc05-4535-4c30-910d-0b8c1a9c8855] Video saved to temporary file: temp_videos/3315fc05-4535-4c30-910d-0b8c1a9c8855.mp4 +2025-08-21 00:25:21 - INFO - [3315fc05-4535-4c30-910d-0b8c1a9c8855] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:25:29 - INFO - [3315fc05-4535-4c30-910d-0b8c1a9c8855] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:25:29 - INFO - [3315fc05-4535-4c30-910d-0b8c1a9c8855] 30 frames saved to temp_videos/3315fc05-4535-4c30-910d-0b8c1a9c8855 +2025-08-21 00:25:41 - INFO - vision_config is None, using default vision config +2025-08-21 00:25:50 - INFO - Tokens per second: 3.7285647057248625, Peak GPU memory MB: 11824.375 +2025-08-21 00:25:50 - INFO - [3315fc05-4535-4c30-910d-0b8c1a9c8855] Inference time: 29.68 seconds, CPU usage: 50.6%, CPU core utilization: [56.4, 62.1, 35.5, 48.3] +2025-08-21 00:25:51 - INFO - [3315fc05-4535-4c30-910d-0b8c1a9c8855] Cleaned up temporary frame directory: temp_videos/3315fc05-4535-4c30-910d-0b8c1a9c8855 +2025-08-21 00:25:51 - INFO - [89135b17-c5fd-406e-8ce7-875b26d87444] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-21 00:25:51 - INFO - [89135b17-c5fd-406e-8ce7-875b26d87444] Video saved to temporary file: temp_videos/89135b17-c5fd-406e-8ce7-875b26d87444.mp4 +2025-08-21 00:25:51 - INFO - [89135b17-c5fd-406e-8ce7-875b26d87444] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:25:56 - INFO - [89135b17-c5fd-406e-8ce7-875b26d87444] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:25:56 - INFO - [89135b17-c5fd-406e-8ce7-875b26d87444] 30 frames saved to temp_videos/89135b17-c5fd-406e-8ce7-875b26d87444 +2025-08-21 00:26:08 - INFO - vision_config is None, using default vision config +2025-08-21 00:26:22 - INFO - Tokens per second: 6.963268555022767, Peak GPU memory MB: 11824.375 +2025-08-21 00:26:22 - INFO - [89135b17-c5fd-406e-8ce7-875b26d87444] Inference time: 31.19 seconds, CPU usage: 38.1%, CPU core utilization: [30.8, 32.9, 49.7, 38.7] +2025-08-21 00:26:22 - INFO - [89135b17-c5fd-406e-8ce7-875b26d87444] Cleaned up temporary frame directory: temp_videos/89135b17-c5fd-406e-8ce7-875b26d87444 +2025-08-21 00:26:22 - INFO - [c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-21 00:26:22 - INFO - [c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a] Video saved to temporary file: temp_videos/c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a.mp4 +2025-08-21 00:26:22 - INFO - [c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:26:27 - INFO - [c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:26:27 - INFO - [c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a] 30 frames saved to temp_videos/c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a +2025-08-21 00:26:39 - INFO - vision_config is None, using default vision config +2025-08-21 00:26:53 - INFO - Tokens per second: 7.242406371874894, Peak GPU memory MB: 11824.375 +2025-08-21 00:26:53 - INFO - [c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a] Inference time: 31.55 seconds, CPU usage: 36.5%, CPU core utilization: [47.2, 19.7, 61.2, 17.9] +2025-08-21 00:26:53 - INFO - [c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a] Cleaned up temporary frame directory: temp_videos/c196a0cc-7f44-4d02-8ddb-af01ad8e7a9a +2025-08-21 00:26:53 - INFO - [3c7789f8-90dc-45d1-be32-cdc10502bbe2] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-21 00:26:53 - INFO - [3c7789f8-90dc-45d1-be32-cdc10502bbe2] Video saved to temporary file: temp_videos/3c7789f8-90dc-45d1-be32-cdc10502bbe2.mp4 +2025-08-21 00:26:53 - INFO - [3c7789f8-90dc-45d1-be32-cdc10502bbe2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:26:58 - INFO - [3c7789f8-90dc-45d1-be32-cdc10502bbe2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:26:58 - INFO - [3c7789f8-90dc-45d1-be32-cdc10502bbe2] 30 frames saved to temp_videos/3c7789f8-90dc-45d1-be32-cdc10502bbe2 +2025-08-21 00:27:11 - INFO - vision_config is None, using default vision config +2025-08-21 00:27:22 - INFO - Tokens per second: 5.385284609810389, Peak GPU memory MB: 11824.375 +2025-08-21 00:27:22 - INFO - [3c7789f8-90dc-45d1-be32-cdc10502bbe2] Inference time: 28.65 seconds, CPU usage: 37.3%, CPU core utilization: [20.6, 21.4, 90.0, 16.9] +2025-08-21 00:27:22 - INFO - [3c7789f8-90dc-45d1-be32-cdc10502bbe2] Cleaned up temporary frame directory: temp_videos/3c7789f8-90dc-45d1-be32-cdc10502bbe2 +2025-08-21 00:27:22 - INFO - [66ffedf3-6d71-4829-adf4-7859b5b21979] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-21 00:27:22 - INFO - [66ffedf3-6d71-4829-adf4-7859b5b21979] Video saved to temporary file: temp_videos/66ffedf3-6d71-4829-adf4-7859b5b21979.mp4 +2025-08-21 00:27:22 - INFO - [66ffedf3-6d71-4829-adf4-7859b5b21979] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:27:27 - INFO - [66ffedf3-6d71-4829-adf4-7859b5b21979] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:27:27 - INFO - [66ffedf3-6d71-4829-adf4-7859b5b21979] 30 frames saved to temp_videos/66ffedf3-6d71-4829-adf4-7859b5b21979 +2025-08-21 00:27:40 - INFO - vision_config is None, using default vision config +2025-08-21 00:27:50 - INFO - Tokens per second: 4.504682210102835, Peak GPU memory MB: 11824.375 +2025-08-21 00:27:50 - INFO - [66ffedf3-6d71-4829-adf4-7859b5b21979] Inference time: 27.70 seconds, CPU usage: 37.4%, CPU core utilization: [27.3, 19.3, 52.1, 50.8] +2025-08-21 00:27:50 - INFO - [66ffedf3-6d71-4829-adf4-7859b5b21979] Cleaned up temporary frame directory: temp_videos/66ffedf3-6d71-4829-adf4-7859b5b21979 +2025-08-21 00:27:50 - INFO - [2167f629-b4f8-4e08-9179-f8eec50d35ab] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_007.mp4' +2025-08-21 00:27:50 - INFO - [2167f629-b4f8-4e08-9179-f8eec50d35ab] Video saved to temporary file: temp_videos/2167f629-b4f8-4e08-9179-f8eec50d35ab.mp4 +2025-08-21 00:27:50 - INFO - [2167f629-b4f8-4e08-9179-f8eec50d35ab] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:27:54 - INFO - [2167f629-b4f8-4e08-9179-f8eec50d35ab] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:27:54 - INFO - [2167f629-b4f8-4e08-9179-f8eec50d35ab] 30 frames saved to temp_videos/2167f629-b4f8-4e08-9179-f8eec50d35ab +2025-08-21 00:28:07 - INFO - vision_config is None, using default vision config +2025-08-21 00:28:27 - INFO - Tokens per second: 9.168312990435263, Peak GPU memory MB: 11824.375 +2025-08-21 00:28:27 - INFO - [2167f629-b4f8-4e08-9179-f8eec50d35ab] Inference time: 37.23 seconds, CPU usage: 35.9%, CPU core utilization: [45.1, 19.0, 27.6, 52.0] +2025-08-21 00:28:27 - INFO - [2167f629-b4f8-4e08-9179-f8eec50d35ab] Cleaned up temporary frame directory: temp_videos/2167f629-b4f8-4e08-9179-f8eec50d35ab diff --git a/API_Transformers/logs/MiniCPM-V-4/20250821_005748.log b/API_Transformers/logs/MiniCPM-V-4/20250821_005748.log new file mode 100644 index 0000000000000000000000000000000000000000..c21452b8a6c0f36f5df7d71b2793599c8420860f --- /dev/null +++ b/API_Transformers/logs/MiniCPM-V-4/20250821_005748.log @@ -0,0 +1,472 @@ +2025-08-21 00:57:48 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-21 00:57:49 - INFO - vision_config is None, using default vision config +2025-08-21 00:58:53 - INFO - Model loaded in 64.86 seconds +2025-08-21 00:58:53 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-21 01:00:40 - INFO - [f1b33371-19eb-4445-b227-d46a97ed0050] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 01:00:40 - INFO - [f1b33371-19eb-4445-b227-d46a97ed0050] Video saved to temporary file: temp_videos/f1b33371-19eb-4445-b227-d46a97ed0050.mp4 +2025-08-21 01:00:40 - INFO - [f1b33371-19eb-4445-b227-d46a97ed0050] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:00:45 - INFO - [f1b33371-19eb-4445-b227-d46a97ed0050] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:00:45 - INFO - [f1b33371-19eb-4445-b227-d46a97ed0050] 30 frames saved to temp_videos/f1b33371-19eb-4445-b227-d46a97ed0050 +2025-08-21 01:01:03 - INFO - vision_config is None, using default vision config +2025-08-21 01:01:14 - INFO - Tokens per second: 5.106639419779758, Peak GPU memory MB: 11824.375 +2025-08-21 01:01:14 - INFO - [f1b33371-19eb-4445-b227-d46a97ed0050] Inference time: 33.66 seconds, CPU usage: 16.3%, CPU core utilization: [16.0, 13.4, 20.9, 14.9] +2025-08-21 01:01:14 - INFO - [f1b33371-19eb-4445-b227-d46a97ed0050] Cleaned up temporary frame directory: temp_videos/f1b33371-19eb-4445-b227-d46a97ed0050 +2025-08-21 01:01:14 - INFO - [a95d5cef-7c7e-42ee-8944-0bfe41e9beed] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-21 01:01:14 - INFO - [a95d5cef-7c7e-42ee-8944-0bfe41e9beed] Video saved to temporary file: temp_videos/a95d5cef-7c7e-42ee-8944-0bfe41e9beed.mp4 +2025-08-21 01:01:14 - INFO - [a95d5cef-7c7e-42ee-8944-0bfe41e9beed] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:01:19 - INFO - [a95d5cef-7c7e-42ee-8944-0bfe41e9beed] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:01:19 - INFO - [a95d5cef-7c7e-42ee-8944-0bfe41e9beed] 30 frames saved to temp_videos/a95d5cef-7c7e-42ee-8944-0bfe41e9beed +2025-08-21 01:01:32 - INFO - vision_config is None, using default vision config +2025-08-21 01:01:43 - INFO - Tokens per second: 5.976602351659928, Peak GPU memory MB: 11824.375 +2025-08-21 01:01:43 - INFO - [a95d5cef-7c7e-42ee-8944-0bfe41e9beed] Inference time: 29.21 seconds, CPU usage: 36.9%, CPU core utilization: [52.9, 47.3, 16.7, 30.9] +2025-08-21 01:01:43 - INFO - [a95d5cef-7c7e-42ee-8944-0bfe41e9beed] Cleaned up temporary frame directory: temp_videos/a95d5cef-7c7e-42ee-8944-0bfe41e9beed +2025-08-21 01:01:43 - INFO - [3074ddc0-8709-448c-b863-d209d175a408] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-21 01:01:43 - INFO - [3074ddc0-8709-448c-b863-d209d175a408] Video saved to temporary file: temp_videos/3074ddc0-8709-448c-b863-d209d175a408.mp4 +2025-08-21 01:01:43 - INFO - [3074ddc0-8709-448c-b863-d209d175a408] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:01:48 - INFO - [3074ddc0-8709-448c-b863-d209d175a408] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:01:48 - INFO - [3074ddc0-8709-448c-b863-d209d175a408] 30 frames saved to temp_videos/3074ddc0-8709-448c-b863-d209d175a408 +2025-08-21 01:02:01 - INFO - vision_config is None, using default vision config +2025-08-21 01:02:11 - INFO - Tokens per second: 5.11842779774044, Peak GPU memory MB: 11824.375 +2025-08-21 01:02:11 - INFO - [3074ddc0-8709-448c-b863-d209d175a408] Inference time: 28.20 seconds, CPU usage: 37.6%, CPU core utilization: [51.0, 23.2, 57.8, 18.2] +2025-08-21 01:02:11 - INFO - [3074ddc0-8709-448c-b863-d209d175a408] Cleaned up temporary frame directory: temp_videos/3074ddc0-8709-448c-b863-d209d175a408 +2025-08-21 01:02:11 - INFO - [07ccf239-d23c-4776-8404-e885a43e8515] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-21 01:02:11 - INFO - [07ccf239-d23c-4776-8404-e885a43e8515] Video saved to temporary file: temp_videos/07ccf239-d23c-4776-8404-e885a43e8515.mp4 +2025-08-21 01:02:11 - INFO - [07ccf239-d23c-4776-8404-e885a43e8515] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:02:16 - INFO - [07ccf239-d23c-4776-8404-e885a43e8515] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:02:16 - INFO - [07ccf239-d23c-4776-8404-e885a43e8515] 30 frames saved to temp_videos/07ccf239-d23c-4776-8404-e885a43e8515 +2025-08-21 01:02:29 - INFO - vision_config is None, using default vision config +2025-08-21 01:02:43 - INFO - Tokens per second: 7.094428465980155, Peak GPU memory MB: 11824.375 +2025-08-21 01:02:43 - INFO - [07ccf239-d23c-4776-8404-e885a43e8515] Inference time: 31.26 seconds, CPU usage: 36.1%, CPU core utilization: [16.6, 46.3, 15.5, 66.2] +2025-08-21 01:02:43 - INFO - [07ccf239-d23c-4776-8404-e885a43e8515] Cleaned up temporary frame directory: temp_videos/07ccf239-d23c-4776-8404-e885a43e8515 +2025-08-21 01:02:43 - INFO - [2bbdb8ec-7655-434d-834f-cb58caa1d778] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-21 01:02:43 - INFO - [2bbdb8ec-7655-434d-834f-cb58caa1d778] Video saved to temporary file: temp_videos/2bbdb8ec-7655-434d-834f-cb58caa1d778.mp4 +2025-08-21 01:02:43 - INFO - [2bbdb8ec-7655-434d-834f-cb58caa1d778] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:02:48 - INFO - [2bbdb8ec-7655-434d-834f-cb58caa1d778] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:02:48 - INFO - [2bbdb8ec-7655-434d-834f-cb58caa1d778] 30 frames saved to temp_videos/2bbdb8ec-7655-434d-834f-cb58caa1d778 +2025-08-21 01:03:00 - INFO - vision_config is None, using default vision config +2025-08-21 01:03:11 - INFO - Tokens per second: 5.149733378245225, Peak GPU memory MB: 11824.375 +2025-08-21 01:03:11 - INFO - [2bbdb8ec-7655-434d-834f-cb58caa1d778] Inference time: 28.50 seconds, CPU usage: 37.4%, CPU core utilization: [19.1, 93.3, 16.5, 20.5] +2025-08-21 01:03:11 - INFO - [2bbdb8ec-7655-434d-834f-cb58caa1d778] Cleaned up temporary frame directory: temp_videos/2bbdb8ec-7655-434d-834f-cb58caa1d778 +2025-08-21 01:03:11 - INFO - [2e2162d1-d7a0-4b02-940b-60b27a47d77e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-21 01:03:11 - INFO - [2e2162d1-d7a0-4b02-940b-60b27a47d77e] Video saved to temporary file: temp_videos/2e2162d1-d7a0-4b02-940b-60b27a47d77e.mp4 +2025-08-21 01:03:11 - INFO - [2e2162d1-d7a0-4b02-940b-60b27a47d77e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:03:16 - INFO - [2e2162d1-d7a0-4b02-940b-60b27a47d77e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:03:16 - INFO - [2e2162d1-d7a0-4b02-940b-60b27a47d77e] 30 frames saved to temp_videos/2e2162d1-d7a0-4b02-940b-60b27a47d77e +2025-08-21 01:03:29 - INFO - vision_config is None, using default vision config +2025-08-21 01:03:37 - INFO - Tokens per second: 1.8942455900034438, Peak GPU memory MB: 11824.375 +2025-08-21 01:03:37 - INFO - [2e2162d1-d7a0-4b02-940b-60b27a47d77e] Inference time: 25.68 seconds, CPU usage: 37.9%, CPU core utilization: [27.3, 31.8, 57.5, 35.1] +2025-08-21 01:03:37 - INFO - [2e2162d1-d7a0-4b02-940b-60b27a47d77e] Cleaned up temporary frame directory: temp_videos/2e2162d1-d7a0-4b02-940b-60b27a47d77e +2025-08-21 01:03:37 - INFO - [f08503e2-3694-4dd9-b73f-a2cebaac51af] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_007.mp4' +2025-08-21 01:03:37 - INFO - [f08503e2-3694-4dd9-b73f-a2cebaac51af] Video saved to temporary file: temp_videos/f08503e2-3694-4dd9-b73f-a2cebaac51af.mp4 +2025-08-21 01:03:37 - INFO - [f08503e2-3694-4dd9-b73f-a2cebaac51af] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:03:42 - INFO - [f08503e2-3694-4dd9-b73f-a2cebaac51af] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:03:42 - INFO - [f08503e2-3694-4dd9-b73f-a2cebaac51af] 30 frames saved to temp_videos/f08503e2-3694-4dd9-b73f-a2cebaac51af +2025-08-21 01:03:55 - INFO - vision_config is None, using default vision config +2025-08-21 01:04:03 - INFO - Tokens per second: 2.3168022945047397, Peak GPU memory MB: 11824.375 +2025-08-21 01:04:03 - INFO - [f08503e2-3694-4dd9-b73f-a2cebaac51af] Inference time: 25.99 seconds, CPU usage: 37.8%, CPU core utilization: [22.5, 61.1, 17.4, 50.2] +2025-08-21 01:04:03 - INFO - [f08503e2-3694-4dd9-b73f-a2cebaac51af] Cleaned up temporary frame directory: temp_videos/f08503e2-3694-4dd9-b73f-a2cebaac51af +2025-08-21 01:04:03 - INFO - [63cd6cc8-794f-4d25-9168-10dbba530d1d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_008.mp4' +2025-08-21 01:04:03 - INFO - [63cd6cc8-794f-4d25-9168-10dbba530d1d] Video saved to temporary file: temp_videos/63cd6cc8-794f-4d25-9168-10dbba530d1d.mp4 +2025-08-21 01:04:03 - INFO - [63cd6cc8-794f-4d25-9168-10dbba530d1d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:04:08 - INFO - [63cd6cc8-794f-4d25-9168-10dbba530d1d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:04:08 - INFO - [63cd6cc8-794f-4d25-9168-10dbba530d1d] 30 frames saved to temp_videos/63cd6cc8-794f-4d25-9168-10dbba530d1d +2025-08-21 01:04:21 - INFO - vision_config is None, using default vision config +2025-08-21 01:04:30 - INFO - Tokens per second: 4.072920119323832, Peak GPU memory MB: 11824.375 +2025-08-21 01:04:30 - INFO - [63cd6cc8-794f-4d25-9168-10dbba530d1d] Inference time: 27.38 seconds, CPU usage: 37.7%, CPU core utilization: [26.2, 41.3, 45.1, 38.4] +2025-08-21 01:04:30 - INFO - [63cd6cc8-794f-4d25-9168-10dbba530d1d] Cleaned up temporary frame directory: temp_videos/63cd6cc8-794f-4d25-9168-10dbba530d1d +2025-08-21 01:04:30 - INFO - [9c39529e-632b-4ee3-8076-2ecd9890f71e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_009.mp4' +2025-08-21 01:04:30 - INFO - [9c39529e-632b-4ee3-8076-2ecd9890f71e] Video saved to temporary file: temp_videos/9c39529e-632b-4ee3-8076-2ecd9890f71e.mp4 +2025-08-21 01:04:30 - INFO - [9c39529e-632b-4ee3-8076-2ecd9890f71e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:04:35 - INFO - [9c39529e-632b-4ee3-8076-2ecd9890f71e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:04:35 - INFO - [9c39529e-632b-4ee3-8076-2ecd9890f71e] 30 frames saved to temp_videos/9c39529e-632b-4ee3-8076-2ecd9890f71e +2025-08-21 01:04:48 - INFO - vision_config is None, using default vision config +2025-08-21 01:04:56 - INFO - Tokens per second: 2.3167189804012382, Peak GPU memory MB: 11824.375 +2025-08-21 01:04:56 - INFO - [9c39529e-632b-4ee3-8076-2ecd9890f71e] Inference time: 26.01 seconds, CPU usage: 38.1%, CPU core utilization: [21.9, 49.8, 55.8, 25.0] +2025-08-21 01:04:56 - INFO - [9c39529e-632b-4ee3-8076-2ecd9890f71e] Cleaned up temporary frame directory: temp_videos/9c39529e-632b-4ee3-8076-2ecd9890f71e +2025-08-21 01:04:56 - INFO - [81880b7c-ee68-4a30-849f-1d4ec94e298d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_010.mp4' +2025-08-21 01:04:56 - INFO - [81880b7c-ee68-4a30-849f-1d4ec94e298d] Video saved to temporary file: temp_videos/81880b7c-ee68-4a30-849f-1d4ec94e298d.mp4 +2025-08-21 01:04:56 - INFO - [81880b7c-ee68-4a30-849f-1d4ec94e298d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:05:01 - INFO - [81880b7c-ee68-4a30-849f-1d4ec94e298d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:05:01 - INFO - [81880b7c-ee68-4a30-849f-1d4ec94e298d] 30 frames saved to temp_videos/81880b7c-ee68-4a30-849f-1d4ec94e298d +2025-08-21 01:05:14 - INFO - vision_config is None, using default vision config +2025-08-21 01:05:24 - INFO - Tokens per second: 3.995493400475914, Peak GPU memory MB: 11824.375 +2025-08-21 01:05:24 - INFO - [81880b7c-ee68-4a30-849f-1d4ec94e298d] Inference time: 27.32 seconds, CPU usage: 37.8%, CPU core utilization: [24.0, 30.3, 46.3, 50.6] +2025-08-21 01:05:24 - INFO - [81880b7c-ee68-4a30-849f-1d4ec94e298d] Cleaned up temporary frame directory: temp_videos/81880b7c-ee68-4a30-849f-1d4ec94e298d +2025-08-21 01:05:24 - INFO - [17dbdf76-a781-446b-92e4-20f0c09ea7fb] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_011.mp4' +2025-08-21 01:05:24 - INFO - [17dbdf76-a781-446b-92e4-20f0c09ea7fb] Video saved to temporary file: temp_videos/17dbdf76-a781-446b-92e4-20f0c09ea7fb.mp4 +2025-08-21 01:05:24 - INFO - [17dbdf76-a781-446b-92e4-20f0c09ea7fb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:05:28 - INFO - [17dbdf76-a781-446b-92e4-20f0c09ea7fb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:05:28 - INFO - [17dbdf76-a781-446b-92e4-20f0c09ea7fb] 30 frames saved to temp_videos/17dbdf76-a781-446b-92e4-20f0c09ea7fb +2025-08-21 01:05:41 - INFO - vision_config is None, using default vision config +2025-08-21 01:05:53 - INFO - Tokens per second: 6.202907021110835, Peak GPU memory MB: 11824.375 +2025-08-21 01:05:53 - INFO - [17dbdf76-a781-446b-92e4-20f0c09ea7fb] Inference time: 29.86 seconds, CPU usage: 36.8%, CPU core utilization: [55.8, 29.8, 44.5, 17.1] +2025-08-21 01:05:53 - INFO - [17dbdf76-a781-446b-92e4-20f0c09ea7fb] Cleaned up temporary frame directory: temp_videos/17dbdf76-a781-446b-92e4-20f0c09ea7fb +2025-08-21 01:05:53 - INFO - [cb51ff1b-2681-489a-944e-810fb0878d91] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_012.mp4' +2025-08-21 01:05:53 - INFO - [cb51ff1b-2681-489a-944e-810fb0878d91] Video saved to temporary file: temp_videos/cb51ff1b-2681-489a-944e-810fb0878d91.mp4 +2025-08-21 01:05:53 - INFO - [cb51ff1b-2681-489a-944e-810fb0878d91] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:05:58 - INFO - [cb51ff1b-2681-489a-944e-810fb0878d91] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:05:58 - INFO - [cb51ff1b-2681-489a-944e-810fb0878d91] 30 frames saved to temp_videos/cb51ff1b-2681-489a-944e-810fb0878d91 +2025-08-21 01:06:11 - INFO - vision_config is None, using default vision config +2025-08-21 01:06:24 - INFO - Tokens per second: 6.5597487765640565, Peak GPU memory MB: 11824.375 +2025-08-21 01:06:24 - INFO - [cb51ff1b-2681-489a-944e-810fb0878d91] Inference time: 30.37 seconds, CPU usage: 36.6%, CPU core utilization: [25.1, 23.9, 51.7, 45.5] +2025-08-21 01:06:24 - INFO - [cb51ff1b-2681-489a-944e-810fb0878d91] Cleaned up temporary frame directory: temp_videos/cb51ff1b-2681-489a-944e-810fb0878d91 +2025-08-21 01:06:24 - INFO - [05904247-99c1-419b-974f-352384eb4d6f] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_013.mp4' +2025-08-21 01:06:24 - INFO - [05904247-99c1-419b-974f-352384eb4d6f] Video saved to temporary file: temp_videos/05904247-99c1-419b-974f-352384eb4d6f.mp4 +2025-08-21 01:06:24 - INFO - [05904247-99c1-419b-974f-352384eb4d6f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:06:29 - INFO - [05904247-99c1-419b-974f-352384eb4d6f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:06:29 - INFO - [05904247-99c1-419b-974f-352384eb4d6f] 30 frames saved to temp_videos/05904247-99c1-419b-974f-352384eb4d6f +2025-08-21 01:06:42 - INFO - vision_config is None, using default vision config +2025-08-21 01:06:54 - INFO - Tokens per second: 6.599854347514273, Peak GPU memory MB: 11824.375 +2025-08-21 01:06:54 - INFO - [05904247-99c1-419b-974f-352384eb4d6f] Inference time: 30.52 seconds, CPU usage: 36.9%, CPU core utilization: [68.2, 18.6, 42.7, 18.1] +2025-08-21 01:06:54 - INFO - [05904247-99c1-419b-974f-352384eb4d6f] Cleaned up temporary frame directory: temp_videos/05904247-99c1-419b-974f-352384eb4d6f +2025-08-21 01:06:54 - INFO - [1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_014.mp4' +2025-08-21 01:06:54 - INFO - [1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c] Video saved to temporary file: temp_videos/1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c.mp4 +2025-08-21 01:06:54 - INFO - [1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:06:59 - INFO - [1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:06:59 - INFO - [1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c] 30 frames saved to temp_videos/1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c +2025-08-21 01:07:12 - INFO - vision_config is None, using default vision config +2025-08-21 01:07:24 - INFO - Tokens per second: 6.004141023516783, Peak GPU memory MB: 11824.375 +2025-08-21 01:07:24 - INFO - [1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c] Inference time: 29.61 seconds, CPU usage: 36.9%, CPU core utilization: [54.0, 34.3, 33.5, 25.6] +2025-08-21 01:07:24 - INFO - [1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c] Cleaned up temporary frame directory: temp_videos/1a8e1c72-a4c4-4ac2-ad36-2bb2e3c2218c +2025-08-21 01:07:24 - INFO - [4180adcc-5e38-40df-9153-9dc175d55b7d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_015.mp4' +2025-08-21 01:07:24 - INFO - [4180adcc-5e38-40df-9153-9dc175d55b7d] Video saved to temporary file: temp_videos/4180adcc-5e38-40df-9153-9dc175d55b7d.mp4 +2025-08-21 01:07:24 - INFO - [4180adcc-5e38-40df-9153-9dc175d55b7d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:07:29 - INFO - [4180adcc-5e38-40df-9153-9dc175d55b7d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:07:29 - INFO - [4180adcc-5e38-40df-9153-9dc175d55b7d] 30 frames saved to temp_videos/4180adcc-5e38-40df-9153-9dc175d55b7d +2025-08-21 01:07:42 - INFO - vision_config is None, using default vision config +2025-08-21 01:07:57 - INFO - Tokens per second: 7.644809074843396, Peak GPU memory MB: 11824.375 +2025-08-21 01:07:57 - INFO - [4180adcc-5e38-40df-9153-9dc175d55b7d] Inference time: 32.59 seconds, CPU usage: 35.7%, CPU core utilization: [26.1, 37.3, 48.7, 30.7] +2025-08-21 01:07:57 - INFO - [4180adcc-5e38-40df-9153-9dc175d55b7d] Cleaned up temporary frame directory: temp_videos/4180adcc-5e38-40df-9153-9dc175d55b7d +2025-08-21 01:07:57 - INFO - [31151bfe-e639-4354-8d86-1446365b7a6d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_016.mp4' +2025-08-21 01:07:57 - INFO - [31151bfe-e639-4354-8d86-1446365b7a6d] Video saved to temporary file: temp_videos/31151bfe-e639-4354-8d86-1446365b7a6d.mp4 +2025-08-21 01:07:57 - INFO - [31151bfe-e639-4354-8d86-1446365b7a6d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:08:01 - INFO - [31151bfe-e639-4354-8d86-1446365b7a6d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:08:02 - INFO - [31151bfe-e639-4354-8d86-1446365b7a6d] 30 frames saved to temp_videos/31151bfe-e639-4354-8d86-1446365b7a6d +2025-08-21 01:08:14 - INFO - vision_config is None, using default vision config +2025-08-21 01:08:24 - INFO - Tokens per second: 3.917112300612553, Peak GPU memory MB: 11824.375 +2025-08-21 01:08:24 - INFO - [31151bfe-e639-4354-8d86-1446365b7a6d] Inference time: 27.22 seconds, CPU usage: 37.9%, CPU core utilization: [26.2, 35.0, 54.6, 35.9] +2025-08-21 01:08:24 - INFO - [31151bfe-e639-4354-8d86-1446365b7a6d] Cleaned up temporary frame directory: temp_videos/31151bfe-e639-4354-8d86-1446365b7a6d +2025-08-21 01:08:24 - INFO - [bea72135-e169-4ffa-8a4d-711a63d29de7] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_017.mp4' +2025-08-21 01:08:24 - INFO - [bea72135-e169-4ffa-8a4d-711a63d29de7] Video saved to temporary file: temp_videos/bea72135-e169-4ffa-8a4d-711a63d29de7.mp4 +2025-08-21 01:08:24 - INFO - [bea72135-e169-4ffa-8a4d-711a63d29de7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:08:29 - INFO - [bea72135-e169-4ffa-8a4d-711a63d29de7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:08:29 - INFO - [bea72135-e169-4ffa-8a4d-711a63d29de7] 30 frames saved to temp_videos/bea72135-e169-4ffa-8a4d-711a63d29de7 +2025-08-21 01:08:42 - INFO - vision_config is None, using default vision config +2025-08-21 01:08:52 - INFO - Tokens per second: 5.140461789786328, Peak GPU memory MB: 11824.375 +2025-08-21 01:08:52 - INFO - [bea72135-e169-4ffa-8a4d-711a63d29de7] Inference time: 28.39 seconds, CPU usage: 36.8%, CPU core utilization: [30.2, 35.9, 49.2, 32.1] +2025-08-21 01:08:52 - INFO - [bea72135-e169-4ffa-8a4d-711a63d29de7] Cleaned up temporary frame directory: temp_videos/bea72135-e169-4ffa-8a4d-711a63d29de7 +2025-08-21 01:08:52 - INFO - [0225d6ec-246f-4a57-8cd4-937fc512a6da] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_018.mp4' +2025-08-21 01:08:52 - INFO - [0225d6ec-246f-4a57-8cd4-937fc512a6da] Video saved to temporary file: temp_videos/0225d6ec-246f-4a57-8cd4-937fc512a6da.mp4 +2025-08-21 01:08:52 - INFO - [0225d6ec-246f-4a57-8cd4-937fc512a6da] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:08:57 - INFO - [0225d6ec-246f-4a57-8cd4-937fc512a6da] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:08:57 - INFO - [0225d6ec-246f-4a57-8cd4-937fc512a6da] 30 frames saved to temp_videos/0225d6ec-246f-4a57-8cd4-937fc512a6da +2025-08-21 01:09:10 - INFO - vision_config is None, using default vision config +2025-08-21 01:09:25 - INFO - Tokens per second: 7.796685733470381, Peak GPU memory MB: 11824.375 +2025-08-21 01:09:25 - INFO - [0225d6ec-246f-4a57-8cd4-937fc512a6da] Inference time: 32.85 seconds, CPU usage: 36.0%, CPU core utilization: [47.5, 17.9, 27.3, 51.2] +2025-08-21 01:09:25 - INFO - [0225d6ec-246f-4a57-8cd4-937fc512a6da] Cleaned up temporary frame directory: temp_videos/0225d6ec-246f-4a57-8cd4-937fc512a6da +2025-08-21 01:09:25 - INFO - [a4d435b1-e6a4-4c53-883e-4df02b912d3a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_019.mp4' +2025-08-21 01:09:25 - INFO - [a4d435b1-e6a4-4c53-883e-4df02b912d3a] Video saved to temporary file: temp_videos/a4d435b1-e6a4-4c53-883e-4df02b912d3a.mp4 +2025-08-21 01:09:25 - INFO - [a4d435b1-e6a4-4c53-883e-4df02b912d3a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:09:30 - INFO - [a4d435b1-e6a4-4c53-883e-4df02b912d3a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:09:30 - INFO - [a4d435b1-e6a4-4c53-883e-4df02b912d3a] 30 frames saved to temp_videos/a4d435b1-e6a4-4c53-883e-4df02b912d3a +2025-08-21 01:09:43 - INFO - vision_config is None, using default vision config +2025-08-21 01:09:59 - INFO - Tokens per second: 8.04906132671884, Peak GPU memory MB: 11824.375 +2025-08-21 01:09:59 - INFO - [a4d435b1-e6a4-4c53-883e-4df02b912d3a] Inference time: 33.57 seconds, CPU usage: 35.7%, CPU core utilization: [28.8, 35.7, 39.0, 39.3] +2025-08-21 01:09:59 - INFO - [a4d435b1-e6a4-4c53-883e-4df02b912d3a] Cleaned up temporary frame directory: temp_videos/a4d435b1-e6a4-4c53-883e-4df02b912d3a +2025-08-21 01:09:59 - INFO - [cb7f092b-c376-4f47-80ca-804b08b972a4] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_020.mp4' +2025-08-21 01:09:59 - INFO - [cb7f092b-c376-4f47-80ca-804b08b972a4] Video saved to temporary file: temp_videos/cb7f092b-c376-4f47-80ca-804b08b972a4.mp4 +2025-08-21 01:09:59 - INFO - [cb7f092b-c376-4f47-80ca-804b08b972a4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:10:04 - INFO - [cb7f092b-c376-4f47-80ca-804b08b972a4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:10:04 - INFO - [cb7f092b-c376-4f47-80ca-804b08b972a4] 30 frames saved to temp_videos/cb7f092b-c376-4f47-80ca-804b08b972a4 +2025-08-21 01:10:16 - INFO - vision_config is None, using default vision config +2025-08-21 01:10:29 - INFO - Tokens per second: 6.551982232700224, Peak GPU memory MB: 11824.375 +2025-08-21 01:10:29 - INFO - [cb7f092b-c376-4f47-80ca-804b08b972a4] Inference time: 30.41 seconds, CPU usage: 36.7%, CPU core utilization: [35.2, 27.6, 62.1, 22.0] +2025-08-21 01:10:29 - INFO - [cb7f092b-c376-4f47-80ca-804b08b972a4] Cleaned up temporary frame directory: temp_videos/cb7f092b-c376-4f47-80ca-804b08b972a4 +2025-08-21 01:10:29 - INFO - [2e42703b-a52c-49f8-a396-ae02062d1c39] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_021.mp4' +2025-08-21 01:10:29 - INFO - [2e42703b-a52c-49f8-a396-ae02062d1c39] Video saved to temporary file: temp_videos/2e42703b-a52c-49f8-a396-ae02062d1c39.mp4 +2025-08-21 01:10:29 - INFO - [2e42703b-a52c-49f8-a396-ae02062d1c39] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:10:34 - INFO - [2e42703b-a52c-49f8-a396-ae02062d1c39] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:10:34 - INFO - [2e42703b-a52c-49f8-a396-ae02062d1c39] 30 frames saved to temp_videos/2e42703b-a52c-49f8-a396-ae02062d1c39 +2025-08-21 01:10:47 - INFO - vision_config is None, using default vision config +2025-08-21 01:10:58 - INFO - Tokens per second: 5.068557863754886, Peak GPU memory MB: 11824.375 +2025-08-21 01:10:58 - INFO - [2e42703b-a52c-49f8-a396-ae02062d1c39] Inference time: 28.44 seconds, CPU usage: 37.4%, CPU core utilization: [24.8, 47.0, 16.6, 61.0] +2025-08-21 01:10:58 - INFO - [2e42703b-a52c-49f8-a396-ae02062d1c39] Cleaned up temporary frame directory: temp_videos/2e42703b-a52c-49f8-a396-ae02062d1c39 +2025-08-21 01:10:58 - INFO - [b5420571-277f-43c0-ba2e-141c5b252721] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_022.mp4' +2025-08-21 01:10:58 - INFO - [b5420571-277f-43c0-ba2e-141c5b252721] Video saved to temporary file: temp_videos/b5420571-277f-43c0-ba2e-141c5b252721.mp4 +2025-08-21 01:10:58 - INFO - [b5420571-277f-43c0-ba2e-141c5b252721] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:11:02 - INFO - [b5420571-277f-43c0-ba2e-141c5b252721] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:11:02 - INFO - [b5420571-277f-43c0-ba2e-141c5b252721] 30 frames saved to temp_videos/b5420571-277f-43c0-ba2e-141c5b252721 +2025-08-21 01:11:15 - INFO - vision_config is None, using default vision config +2025-08-21 01:11:26 - INFO - Tokens per second: 4.765763711580632, Peak GPU memory MB: 11824.375 +2025-08-21 01:11:26 - INFO - [b5420571-277f-43c0-ba2e-141c5b252721] Inference time: 27.98 seconds, CPU usage: 37.6%, CPU core utilization: [43.1, 34.8, 55.9, 16.7] +2025-08-21 01:11:26 - INFO - [b5420571-277f-43c0-ba2e-141c5b252721] Cleaned up temporary frame directory: temp_videos/b5420571-277f-43c0-ba2e-141c5b252721 +2025-08-21 01:11:26 - INFO - [0df499a8-8a08-4b6b-a8bd-63fd84cde688] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_023.mp4' +2025-08-21 01:11:26 - INFO - [0df499a8-8a08-4b6b-a8bd-63fd84cde688] Video saved to temporary file: temp_videos/0df499a8-8a08-4b6b-a8bd-63fd84cde688.mp4 +2025-08-21 01:11:26 - INFO - [0df499a8-8a08-4b6b-a8bd-63fd84cde688] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:11:30 - INFO - [0df499a8-8a08-4b6b-a8bd-63fd84cde688] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:11:31 - INFO - [0df499a8-8a08-4b6b-a8bd-63fd84cde688] 30 frames saved to temp_videos/0df499a8-8a08-4b6b-a8bd-63fd84cde688 +2025-08-21 01:11:43 - INFO - vision_config is None, using default vision config +2025-08-21 01:11:58 - INFO - Tokens per second: 7.483257212457778, Peak GPU memory MB: 11824.375 +2025-08-21 01:11:58 - INFO - [0df499a8-8a08-4b6b-a8bd-63fd84cde688] Inference time: 32.23 seconds, CPU usage: 36.1%, CPU core utilization: [26.1, 27.6, 47.6, 43.3] +2025-08-21 01:11:58 - INFO - [0df499a8-8a08-4b6b-a8bd-63fd84cde688] Cleaned up temporary frame directory: temp_videos/0df499a8-8a08-4b6b-a8bd-63fd84cde688 +2025-08-21 01:11:58 - INFO - [1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_024.mp4' +2025-08-21 01:11:58 - INFO - [1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a] Video saved to temporary file: temp_videos/1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a.mp4 +2025-08-21 01:11:58 - INFO - [1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:12:03 - INFO - [1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:12:03 - INFO - [1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a] 30 frames saved to temp_videos/1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a +2025-08-21 01:12:16 - INFO - vision_config is None, using default vision config +2025-08-21 01:12:25 - INFO - Tokens per second: 3.676425320411625, Peak GPU memory MB: 11824.375 +2025-08-21 01:12:25 - INFO - [1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a] Inference time: 27.02 seconds, CPU usage: 38.5%, CPU core utilization: [40.1, 28.5, 57.3, 28.2] +2025-08-21 01:12:25 - INFO - [1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a] Cleaned up temporary frame directory: temp_videos/1e06a7b7-be5d-449e-b3a5-84f5a7f53e2a +2025-08-21 01:12:25 - INFO - [5794ceef-3e1b-4291-b263-2b236146168a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_025.mp4' +2025-08-21 01:12:25 - INFO - [5794ceef-3e1b-4291-b263-2b236146168a] Video saved to temporary file: temp_videos/5794ceef-3e1b-4291-b263-2b236146168a.mp4 +2025-08-21 01:12:25 - INFO - [5794ceef-3e1b-4291-b263-2b236146168a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:12:30 - INFO - [5794ceef-3e1b-4291-b263-2b236146168a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:12:30 - INFO - [5794ceef-3e1b-4291-b263-2b236146168a] 30 frames saved to temp_videos/5794ceef-3e1b-4291-b263-2b236146168a +2025-08-21 01:12:43 - INFO - vision_config is None, using default vision config +2025-08-21 01:12:53 - INFO - Tokens per second: 5.01668206513031, Peak GPU memory MB: 11824.375 +2025-08-21 01:12:53 - INFO - [5794ceef-3e1b-4291-b263-2b236146168a] Inference time: 28.36 seconds, CPU usage: 37.5%, CPU core utilization: [29.8, 41.1, 41.8, 37.3] +2025-08-21 01:12:53 - INFO - [5794ceef-3e1b-4291-b263-2b236146168a] Cleaned up temporary frame directory: temp_videos/5794ceef-3e1b-4291-b263-2b236146168a +2025-08-21 01:12:53 - INFO - [686109f9-7315-4a90-8563-98c12607d0a8] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_026.mp4' +2025-08-21 01:12:53 - INFO - [686109f9-7315-4a90-8563-98c12607d0a8] Video saved to temporary file: temp_videos/686109f9-7315-4a90-8563-98c12607d0a8.mp4 +2025-08-21 01:12:53 - INFO - [686109f9-7315-4a90-8563-98c12607d0a8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:12:58 - INFO - [686109f9-7315-4a90-8563-98c12607d0a8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:12:58 - INFO - [686109f9-7315-4a90-8563-98c12607d0a8] 30 frames saved to temp_videos/686109f9-7315-4a90-8563-98c12607d0a8 +2025-08-21 01:13:11 - INFO - vision_config is None, using default vision config +2025-08-21 01:13:23 - INFO - Tokens per second: 6.198040985676184, Peak GPU memory MB: 11824.375 +2025-08-21 01:13:23 - INFO - [686109f9-7315-4a90-8563-98c12607d0a8] Inference time: 29.83 seconds, CPU usage: 37.0%, CPU core utilization: [26.8, 32.5, 30.5, 58.1] +2025-08-21 01:13:23 - INFO - [686109f9-7315-4a90-8563-98c12607d0a8] Cleaned up temporary frame directory: temp_videos/686109f9-7315-4a90-8563-98c12607d0a8 +2025-08-21 01:13:23 - INFO - [3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_027.mp4' +2025-08-21 01:13:23 - INFO - [3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c] Video saved to temporary file: temp_videos/3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c.mp4 +2025-08-21 01:13:23 - INFO - [3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:13:28 - INFO - [3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:13:28 - INFO - [3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c] 30 frames saved to temp_videos/3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c +2025-08-21 01:13:41 - INFO - vision_config is None, using default vision config +2025-08-21 01:13:54 - INFO - Tokens per second: 6.7633887422454855, Peak GPU memory MB: 11824.375 +2025-08-21 01:13:54 - INFO - [3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c] Inference time: 30.77 seconds, CPU usage: 36.5%, CPU core utilization: [64.3, 25.4, 39.5, 16.6] +2025-08-21 01:13:54 - INFO - [3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c] Cleaned up temporary frame directory: temp_videos/3b5a6bbe-89d5-4405-82ff-ea34ddf2f53c +2025-08-21 01:13:54 - INFO - [c0db1c86-c4f5-4f7a-92ec-0b0631bde80c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_028.mp4' +2025-08-21 01:13:54 - INFO - [c0db1c86-c4f5-4f7a-92ec-0b0631bde80c] Video saved to temporary file: temp_videos/c0db1c86-c4f5-4f7a-92ec-0b0631bde80c.mp4 +2025-08-21 01:13:54 - INFO - [c0db1c86-c4f5-4f7a-92ec-0b0631bde80c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:13:59 - INFO - [c0db1c86-c4f5-4f7a-92ec-0b0631bde80c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:13:59 - INFO - [c0db1c86-c4f5-4f7a-92ec-0b0631bde80c] 30 frames saved to temp_videos/c0db1c86-c4f5-4f7a-92ec-0b0631bde80c +2025-08-21 01:14:12 - INFO - vision_config is None, using default vision config +2025-08-21 01:14:24 - INFO - Tokens per second: 6.241116142632914, Peak GPU memory MB: 11824.375 +2025-08-21 01:14:24 - INFO - [c0db1c86-c4f5-4f7a-92ec-0b0631bde80c] Inference time: 29.90 seconds, CPU usage: 36.9%, CPU core utilization: [52.1, 24.6, 27.8, 43.1] +2025-08-21 01:14:24 - INFO - [c0db1c86-c4f5-4f7a-92ec-0b0631bde80c] Cleaned up temporary frame directory: temp_videos/c0db1c86-c4f5-4f7a-92ec-0b0631bde80c +2025-08-21 01:14:24 - INFO - [bb36dcfa-9c43-4bf7-8c8e-becac106dbbb] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_029.mp4' +2025-08-21 01:14:24 - INFO - [bb36dcfa-9c43-4bf7-8c8e-becac106dbbb] Video saved to temporary file: temp_videos/bb36dcfa-9c43-4bf7-8c8e-becac106dbbb.mp4 +2025-08-21 01:14:24 - INFO - [bb36dcfa-9c43-4bf7-8c8e-becac106dbbb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:14:29 - INFO - [bb36dcfa-9c43-4bf7-8c8e-becac106dbbb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:14:29 - INFO - [bb36dcfa-9c43-4bf7-8c8e-becac106dbbb] 30 frames saved to temp_videos/bb36dcfa-9c43-4bf7-8c8e-becac106dbbb +2025-08-21 01:14:42 - INFO - vision_config is None, using default vision config +2025-08-21 01:14:53 - INFO - Tokens per second: 5.427872993526753, Peak GPU memory MB: 11824.375 +2025-08-21 01:14:53 - INFO - [bb36dcfa-9c43-4bf7-8c8e-becac106dbbb] Inference time: 28.90 seconds, CPU usage: 36.8%, CPU core utilization: [58.1, 25.7, 27.4, 36.1] +2025-08-21 01:14:53 - INFO - [bb36dcfa-9c43-4bf7-8c8e-becac106dbbb] Cleaned up temporary frame directory: temp_videos/bb36dcfa-9c43-4bf7-8c8e-becac106dbbb +2025-08-21 01:14:53 - INFO - [78a73f26-9f35-4eff-bd02-6c440580ce76] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_030.mp4' +2025-08-21 01:14:53 - INFO - [78a73f26-9f35-4eff-bd02-6c440580ce76] Video saved to temporary file: temp_videos/78a73f26-9f35-4eff-bd02-6c440580ce76.mp4 +2025-08-21 01:14:53 - INFO - [78a73f26-9f35-4eff-bd02-6c440580ce76] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:14:58 - INFO - [78a73f26-9f35-4eff-bd02-6c440580ce76] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:14:58 - INFO - [78a73f26-9f35-4eff-bd02-6c440580ce76] 30 frames saved to temp_videos/78a73f26-9f35-4eff-bd02-6c440580ce76 +2025-08-21 01:15:11 - INFO - vision_config is None, using default vision config +2025-08-21 01:15:24 - INFO - Tokens per second: 6.759735939024082, Peak GPU memory MB: 11824.375 +2025-08-21 01:15:24 - INFO - [78a73f26-9f35-4eff-bd02-6c440580ce76] Inference time: 30.85 seconds, CPU usage: 36.6%, CPU core utilization: [41.3, 20.6, 31.9, 52.4] +2025-08-21 01:15:24 - INFO - [78a73f26-9f35-4eff-bd02-6c440580ce76] Cleaned up temporary frame directory: temp_videos/78a73f26-9f35-4eff-bd02-6c440580ce76 +2025-08-21 01:15:24 - INFO - [59928551-1922-42e9-b7ef-b8f27f8d44a7] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_031.mp4' +2025-08-21 01:15:24 - INFO - [59928551-1922-42e9-b7ef-b8f27f8d44a7] Video saved to temporary file: temp_videos/59928551-1922-42e9-b7ef-b8f27f8d44a7.mp4 +2025-08-21 01:15:24 - INFO - [59928551-1922-42e9-b7ef-b8f27f8d44a7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:15:28 - INFO - [59928551-1922-42e9-b7ef-b8f27f8d44a7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:15:28 - INFO - [59928551-1922-42e9-b7ef-b8f27f8d44a7] 30 frames saved to temp_videos/59928551-1922-42e9-b7ef-b8f27f8d44a7 +2025-08-21 01:15:41 - INFO - vision_config is None, using default vision config +2025-08-21 01:15:52 - INFO - Tokens per second: 5.539399356957513, Peak GPU memory MB: 11824.375 +2025-08-21 01:15:52 - INFO - [59928551-1922-42e9-b7ef-b8f27f8d44a7] Inference time: 28.92 seconds, CPU usage: 37.0%, CPU core utilization: [36.2, 29.1, 66.1, 16.6] +2025-08-21 01:15:53 - INFO - [59928551-1922-42e9-b7ef-b8f27f8d44a7] Cleaned up temporary frame directory: temp_videos/59928551-1922-42e9-b7ef-b8f27f8d44a7 +2025-08-21 01:15:53 - INFO - [824a7ab4-629c-45f5-9a3d-4a4db67f847f] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_032.mp4' +2025-08-21 01:15:53 - INFO - [824a7ab4-629c-45f5-9a3d-4a4db67f847f] Video saved to temporary file: temp_videos/824a7ab4-629c-45f5-9a3d-4a4db67f847f.mp4 +2025-08-21 01:15:53 - INFO - [824a7ab4-629c-45f5-9a3d-4a4db67f847f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:15:57 - INFO - [824a7ab4-629c-45f5-9a3d-4a4db67f847f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:15:57 - INFO - [824a7ab4-629c-45f5-9a3d-4a4db67f847f] 30 frames saved to temp_videos/824a7ab4-629c-45f5-9a3d-4a4db67f847f +2025-08-21 01:16:10 - INFO - vision_config is None, using default vision config +2025-08-21 01:16:19 - INFO - Tokens per second: 3.2576537369296608, Peak GPU memory MB: 11824.375 +2025-08-21 01:16:19 - INFO - [824a7ab4-629c-45f5-9a3d-4a4db67f847f] Inference time: 26.58 seconds, CPU usage: 38.1%, CPU core utilization: [48.7, 36.2, 31.6, 36.0] +2025-08-21 01:16:19 - INFO - [824a7ab4-629c-45f5-9a3d-4a4db67f847f] Cleaned up temporary frame directory: temp_videos/824a7ab4-629c-45f5-9a3d-4a4db67f847f +2025-08-21 01:16:19 - INFO - [9483b45f-591e-4e30-b51b-94a81dec9839] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_033.mp4' +2025-08-21 01:16:19 - INFO - [9483b45f-591e-4e30-b51b-94a81dec9839] Video saved to temporary file: temp_videos/9483b45f-591e-4e30-b51b-94a81dec9839.mp4 +2025-08-21 01:16:19 - INFO - [9483b45f-591e-4e30-b51b-94a81dec9839] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:16:24 - INFO - [9483b45f-591e-4e30-b51b-94a81dec9839] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:16:24 - INFO - [9483b45f-591e-4e30-b51b-94a81dec9839] 30 frames saved to temp_videos/9483b45f-591e-4e30-b51b-94a81dec9839 +2025-08-21 01:16:37 - INFO - vision_config is None, using default vision config +2025-08-21 01:16:49 - INFO - Tokens per second: 6.511908523166135, Peak GPU memory MB: 11824.375 +2025-08-21 01:16:49 - INFO - [9483b45f-591e-4e30-b51b-94a81dec9839] Inference time: 30.29 seconds, CPU usage: 36.5%, CPU core utilization: [55.4, 25.4, 48.9, 16.2] +2025-08-21 01:16:49 - INFO - [9483b45f-591e-4e30-b51b-94a81dec9839] Cleaned up temporary frame directory: temp_videos/9483b45f-591e-4e30-b51b-94a81dec9839 +2025-08-21 01:16:49 - INFO - [8229188c-0cc1-4717-8e19-43ece6e413b5] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_034.mp4' +2025-08-21 01:16:49 - INFO - [8229188c-0cc1-4717-8e19-43ece6e413b5] Video saved to temporary file: temp_videos/8229188c-0cc1-4717-8e19-43ece6e413b5.mp4 +2025-08-21 01:16:49 - INFO - [8229188c-0cc1-4717-8e19-43ece6e413b5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:16:54 - INFO - [8229188c-0cc1-4717-8e19-43ece6e413b5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:16:54 - INFO - [8229188c-0cc1-4717-8e19-43ece6e413b5] 30 frames saved to temp_videos/8229188c-0cc1-4717-8e19-43ece6e413b5 +2025-08-21 01:17:07 - INFO - vision_config is None, using default vision config +2025-08-21 01:17:18 - INFO - Tokens per second: 5.322139736625115, Peak GPU memory MB: 11824.375 +2025-08-21 01:17:18 - INFO - [8229188c-0cc1-4717-8e19-43ece6e413b5] Inference time: 28.59 seconds, CPU usage: 37.4%, CPU core utilization: [41.1, 43.3, 45.5, 19.4] +2025-08-21 01:17:18 - INFO - [8229188c-0cc1-4717-8e19-43ece6e413b5] Cleaned up temporary frame directory: temp_videos/8229188c-0cc1-4717-8e19-43ece6e413b5 +2025-08-21 01:17:18 - INFO - [00df263f-0a55-4646-a5f1-7392cbd3a66e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_035.mp4' +2025-08-21 01:17:18 - INFO - [00df263f-0a55-4646-a5f1-7392cbd3a66e] Video saved to temporary file: temp_videos/00df263f-0a55-4646-a5f1-7392cbd3a66e.mp4 +2025-08-21 01:17:18 - INFO - [00df263f-0a55-4646-a5f1-7392cbd3a66e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:17:23 - INFO - [00df263f-0a55-4646-a5f1-7392cbd3a66e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:17:23 - INFO - [00df263f-0a55-4646-a5f1-7392cbd3a66e] 30 frames saved to temp_videos/00df263f-0a55-4646-a5f1-7392cbd3a66e +2025-08-21 01:17:36 - INFO - vision_config is None, using default vision config +2025-08-21 01:17:44 - INFO - Tokens per second: 2.001110574040108, Peak GPU memory MB: 11824.375 +2025-08-21 01:17:44 - INFO - [00df263f-0a55-4646-a5f1-7392cbd3a66e] Inference time: 25.77 seconds, CPU usage: 38.4%, CPU core utilization: [35.6, 36.8, 32.8, 48.4] +2025-08-21 01:17:44 - INFO - [00df263f-0a55-4646-a5f1-7392cbd3a66e] Cleaned up temporary frame directory: temp_videos/00df263f-0a55-4646-a5f1-7392cbd3a66e +2025-08-21 01:17:44 - INFO - [56354734-256f-4dd4-a7ca-33f9f37b588a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_036.mp4' +2025-08-21 01:17:44 - INFO - [56354734-256f-4dd4-a7ca-33f9f37b588a] Video saved to temporary file: temp_videos/56354734-256f-4dd4-a7ca-33f9f37b588a.mp4 +2025-08-21 01:17:44 - INFO - [56354734-256f-4dd4-a7ca-33f9f37b588a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:17:49 - INFO - [56354734-256f-4dd4-a7ca-33f9f37b588a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:17:49 - INFO - [56354734-256f-4dd4-a7ca-33f9f37b588a] 30 frames saved to temp_videos/56354734-256f-4dd4-a7ca-33f9f37b588a +2025-08-21 01:18:02 - INFO - vision_config is None, using default vision config +2025-08-21 01:18:11 - INFO - Tokens per second: 4.287825767821965, Peak GPU memory MB: 11824.375 +2025-08-21 01:18:11 - INFO - [56354734-256f-4dd4-a7ca-33f9f37b588a] Inference time: 27.47 seconds, CPU usage: 37.6%, CPU core utilization: [18.0, 46.2, 68.9, 17.6] +2025-08-21 01:18:11 - INFO - [56354734-256f-4dd4-a7ca-33f9f37b588a] Cleaned up temporary frame directory: temp_videos/56354734-256f-4dd4-a7ca-33f9f37b588a +2025-08-21 01:18:11 - INFO - [43fc9b52-3741-493c-b317-62cd85256985] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_037.mp4' +2025-08-21 01:18:11 - INFO - [43fc9b52-3741-493c-b317-62cd85256985] Video saved to temporary file: temp_videos/43fc9b52-3741-493c-b317-62cd85256985.mp4 +2025-08-21 01:18:11 - INFO - [43fc9b52-3741-493c-b317-62cd85256985] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:18:16 - INFO - [43fc9b52-3741-493c-b317-62cd85256985] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:18:16 - INFO - [43fc9b52-3741-493c-b317-62cd85256985] 30 frames saved to temp_videos/43fc9b52-3741-493c-b317-62cd85256985 +2025-08-21 01:18:29 - INFO - vision_config is None, using default vision config +2025-08-21 01:18:40 - INFO - Tokens per second: 5.201636019010958, Peak GPU memory MB: 11824.375 +2025-08-21 01:18:40 - INFO - [43fc9b52-3741-493c-b317-62cd85256985] Inference time: 28.50 seconds, CPU usage: 37.4%, CPU core utilization: [36.0, 34.1, 41.3, 38.1] +2025-08-21 01:18:40 - INFO - [43fc9b52-3741-493c-b317-62cd85256985] Cleaned up temporary frame directory: temp_videos/43fc9b52-3741-493c-b317-62cd85256985 +2025-08-21 01:18:40 - INFO - [bb21c0a9-84dd-4b05-8543-a9d4b52958ba] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_038.mp4' +2025-08-21 01:18:40 - INFO - [bb21c0a9-84dd-4b05-8543-a9d4b52958ba] Video saved to temporary file: temp_videos/bb21c0a9-84dd-4b05-8543-a9d4b52958ba.mp4 +2025-08-21 01:18:40 - INFO - [bb21c0a9-84dd-4b05-8543-a9d4b52958ba] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:18:45 - INFO - [bb21c0a9-84dd-4b05-8543-a9d4b52958ba] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:18:45 - INFO - [bb21c0a9-84dd-4b05-8543-a9d4b52958ba] 30 frames saved to temp_videos/bb21c0a9-84dd-4b05-8543-a9d4b52958ba +2025-08-21 01:18:58 - INFO - vision_config is None, using default vision config +2025-08-21 01:19:09 - INFO - Tokens per second: 5.959308976482591, Peak GPU memory MB: 11824.375 +2025-08-21 01:19:09 - INFO - [bb21c0a9-84dd-4b05-8543-a9d4b52958ba] Inference time: 29.47 seconds, CPU usage: 36.6%, CPU core utilization: [47.9, 26.7, 42.6, 29.1] +2025-08-21 01:19:09 - INFO - [bb21c0a9-84dd-4b05-8543-a9d4b52958ba] Cleaned up temporary frame directory: temp_videos/bb21c0a9-84dd-4b05-8543-a9d4b52958ba +2025-08-21 01:19:09 - INFO - [c1c756cf-8d88-40f1-99d6-35014bca5417] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_039.mp4' +2025-08-21 01:19:09 - INFO - [c1c756cf-8d88-40f1-99d6-35014bca5417] Video saved to temporary file: temp_videos/c1c756cf-8d88-40f1-99d6-35014bca5417.mp4 +2025-08-21 01:19:09 - INFO - [c1c756cf-8d88-40f1-99d6-35014bca5417] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:19:14 - INFO - [c1c756cf-8d88-40f1-99d6-35014bca5417] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:19:14 - INFO - [c1c756cf-8d88-40f1-99d6-35014bca5417] 30 frames saved to temp_videos/c1c756cf-8d88-40f1-99d6-35014bca5417 +2025-08-21 01:19:27 - INFO - vision_config is None, using default vision config +2025-08-21 01:19:36 - INFO - Tokens per second: 3.3460227628996955, Peak GPU memory MB: 11824.375 +2025-08-21 01:19:36 - INFO - [c1c756cf-8d88-40f1-99d6-35014bca5417] Inference time: 26.83 seconds, CPU usage: 38.0%, CPU core utilization: [50.8, 48.6, 31.4, 21.4] +2025-08-21 01:19:36 - INFO - [c1c756cf-8d88-40f1-99d6-35014bca5417] Cleaned up temporary frame directory: temp_videos/c1c756cf-8d88-40f1-99d6-35014bca5417 +2025-08-21 01:19:36 - INFO - [e408569f-7a5d-4851-961e-1b0408acf6fd] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_040.mp4' +2025-08-21 01:19:36 - INFO - [e408569f-7a5d-4851-961e-1b0408acf6fd] Video saved to temporary file: temp_videos/e408569f-7a5d-4851-961e-1b0408acf6fd.mp4 +2025-08-21 01:19:36 - INFO - [e408569f-7a5d-4851-961e-1b0408acf6fd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:19:41 - INFO - [e408569f-7a5d-4851-961e-1b0408acf6fd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:19:41 - INFO - [e408569f-7a5d-4851-961e-1b0408acf6fd] 30 frames saved to temp_videos/e408569f-7a5d-4851-961e-1b0408acf6fd +2025-08-21 01:19:54 - INFO - vision_config is None, using default vision config +2025-08-21 01:20:03 - INFO - Tokens per second: 3.758188071762749, Peak GPU memory MB: 11824.375 +2025-08-21 01:20:03 - INFO - [e408569f-7a5d-4851-961e-1b0408acf6fd] Inference time: 27.11 seconds, CPU usage: 37.6%, CPU core utilization: [32.3, 62.0, 32.1, 23.9] +2025-08-21 01:20:03 - INFO - [e408569f-7a5d-4851-961e-1b0408acf6fd] Cleaned up temporary frame directory: temp_videos/e408569f-7a5d-4851-961e-1b0408acf6fd +2025-08-21 01:20:03 - INFO - [5dac0a79-3673-4ab1-b2ca-cefc83712b60] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_041.mp4' +2025-08-21 01:20:03 - INFO - [5dac0a79-3673-4ab1-b2ca-cefc83712b60] Video saved to temporary file: temp_videos/5dac0a79-3673-4ab1-b2ca-cefc83712b60.mp4 +2025-08-21 01:20:03 - INFO - [5dac0a79-3673-4ab1-b2ca-cefc83712b60] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:20:08 - INFO - [5dac0a79-3673-4ab1-b2ca-cefc83712b60] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:20:08 - INFO - [5dac0a79-3673-4ab1-b2ca-cefc83712b60] 30 frames saved to temp_videos/5dac0a79-3673-4ab1-b2ca-cefc83712b60 +2025-08-21 01:20:21 - INFO - vision_config is None, using default vision config +2025-08-21 01:20:30 - INFO - Tokens per second: 3.839752750295925, Peak GPU memory MB: 11824.375 +2025-08-21 01:20:30 - INFO - [5dac0a79-3673-4ab1-b2ca-cefc83712b60] Inference time: 27.12 seconds, CPU usage: 37.8%, CPU core utilization: [28.4, 42.0, 34.0, 46.9] +2025-08-21 01:20:30 - INFO - [5dac0a79-3673-4ab1-b2ca-cefc83712b60] Cleaned up temporary frame directory: temp_videos/5dac0a79-3673-4ab1-b2ca-cefc83712b60 +2025-08-21 01:20:30 - INFO - [92c0e821-54b8-4c96-801a-be04166c4502] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_042.mp4' +2025-08-21 01:20:30 - INFO - [92c0e821-54b8-4c96-801a-be04166c4502] Video saved to temporary file: temp_videos/92c0e821-54b8-4c96-801a-be04166c4502.mp4 +2025-08-21 01:20:30 - INFO - [92c0e821-54b8-4c96-801a-be04166c4502] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:20:35 - INFO - [92c0e821-54b8-4c96-801a-be04166c4502] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:20:35 - INFO - [92c0e821-54b8-4c96-801a-be04166c4502] 30 frames saved to temp_videos/92c0e821-54b8-4c96-801a-be04166c4502 +2025-08-21 01:20:48 - INFO - vision_config is None, using default vision config +2025-08-21 01:21:01 - INFO - Tokens per second: 6.5598252890181605, Peak GPU memory MB: 11824.375 +2025-08-21 01:21:01 - INFO - [92c0e821-54b8-4c96-801a-be04166c4502] Inference time: 30.37 seconds, CPU usage: 36.1%, CPU core utilization: [20.5, 62.7, 16.5, 44.7] +2025-08-21 01:21:01 - INFO - [92c0e821-54b8-4c96-801a-be04166c4502] Cleaned up temporary frame directory: temp_videos/92c0e821-54b8-4c96-801a-be04166c4502 +2025-08-21 01:21:01 - INFO - [f542ced9-0803-492e-a5c3-1a8cf04f1129] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_043.mp4' +2025-08-21 01:21:01 - INFO - [f542ced9-0803-492e-a5c3-1a8cf04f1129] Video saved to temporary file: temp_videos/f542ced9-0803-492e-a5c3-1a8cf04f1129.mp4 +2025-08-21 01:21:01 - INFO - [f542ced9-0803-492e-a5c3-1a8cf04f1129] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:21:06 - INFO - [f542ced9-0803-492e-a5c3-1a8cf04f1129] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:21:06 - INFO - [f542ced9-0803-492e-a5c3-1a8cf04f1129] 30 frames saved to temp_videos/f542ced9-0803-492e-a5c3-1a8cf04f1129 +2025-08-21 01:21:19 - INFO - vision_config is None, using default vision config +2025-08-21 01:21:31 - INFO - Tokens per second: 6.379927791197094, Peak GPU memory MB: 11824.375 +2025-08-21 01:21:31 - INFO - [f542ced9-0803-492e-a5c3-1a8cf04f1129] Inference time: 30.11 seconds, CPU usage: 36.8%, CPU core utilization: [27.4, 48.1, 37.8, 33.9] +2025-08-21 01:21:31 - INFO - [f542ced9-0803-492e-a5c3-1a8cf04f1129] Cleaned up temporary frame directory: temp_videos/f542ced9-0803-492e-a5c3-1a8cf04f1129 +2025-08-21 01:21:31 - INFO - [01cce6ea-c917-49ae-b644-91a34b7204c5] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_044.mp4' +2025-08-21 01:21:31 - INFO - [01cce6ea-c917-49ae-b644-91a34b7204c5] Video saved to temporary file: temp_videos/01cce6ea-c917-49ae-b644-91a34b7204c5.mp4 +2025-08-21 01:21:31 - INFO - [01cce6ea-c917-49ae-b644-91a34b7204c5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:21:36 - INFO - [01cce6ea-c917-49ae-b644-91a34b7204c5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:21:36 - INFO - [01cce6ea-c917-49ae-b644-91a34b7204c5] 30 frames saved to temp_videos/01cce6ea-c917-49ae-b644-91a34b7204c5 +2025-08-21 01:21:49 - INFO - vision_config is None, using default vision config +2025-08-21 01:21:58 - INFO - Tokens per second: 3.7502463323747084, Peak GPU memory MB: 11824.375 +2025-08-21 01:21:58 - INFO - [01cce6ea-c917-49ae-b644-91a34b7204c5] Inference time: 27.08 seconds, CPU usage: 48.6%, CPU core utilization: [82.7, 37.0, 40.9, 33.6] +2025-08-21 01:21:58 - INFO - [01cce6ea-c917-49ae-b644-91a34b7204c5] Cleaned up temporary frame directory: temp_videos/01cce6ea-c917-49ae-b644-91a34b7204c5 +2025-08-21 01:21:58 - INFO - [9738fd4f-d14a-4967-bdb7-b1c9156add2a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_045.mp4' +2025-08-21 01:21:58 - INFO - [9738fd4f-d14a-4967-bdb7-b1c9156add2a] Video saved to temporary file: temp_videos/9738fd4f-d14a-4967-bdb7-b1c9156add2a.mp4 +2025-08-21 01:21:58 - INFO - [9738fd4f-d14a-4967-bdb7-b1c9156add2a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:22:06 - INFO - [9738fd4f-d14a-4967-bdb7-b1c9156add2a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:22:06 - INFO - [9738fd4f-d14a-4967-bdb7-b1c9156add2a] 30 frames saved to temp_videos/9738fd4f-d14a-4967-bdb7-b1c9156add2a +2025-08-21 01:22:19 - INFO - vision_config is None, using default vision config +2025-08-21 01:22:26 - INFO - Tokens per second: 1.8954757210352398, Peak GPU memory MB: 11824.375 +2025-08-21 01:22:26 - INFO - [9738fd4f-d14a-4967-bdb7-b1c9156add2a] Inference time: 28.46 seconds, CPU usage: 50.5%, CPU core utilization: [45.6, 51.9, 34.0, 70.4] +2025-08-21 01:22:26 - INFO - [9738fd4f-d14a-4967-bdb7-b1c9156add2a] Cleaned up temporary frame directory: temp_videos/9738fd4f-d14a-4967-bdb7-b1c9156add2a +2025-08-21 01:22:26 - INFO - [e5c0b243-2f04-4afd-a044-8e574b65e7fe] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_046.mp4' +2025-08-21 01:22:26 - INFO - [e5c0b243-2f04-4afd-a044-8e574b65e7fe] Video saved to temporary file: temp_videos/e5c0b243-2f04-4afd-a044-8e574b65e7fe.mp4 +2025-08-21 01:22:26 - INFO - [e5c0b243-2f04-4afd-a044-8e574b65e7fe] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:22:31 - INFO - [e5c0b243-2f04-4afd-a044-8e574b65e7fe] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:22:31 - INFO - [e5c0b243-2f04-4afd-a044-8e574b65e7fe] 30 frames saved to temp_videos/e5c0b243-2f04-4afd-a044-8e574b65e7fe +2025-08-21 01:22:44 - INFO - vision_config is None, using default vision config +2025-08-21 01:22:56 - INFO - Tokens per second: 5.655760454979962, Peak GPU memory MB: 11824.375 +2025-08-21 01:22:56 - INFO - [e5c0b243-2f04-4afd-a044-8e574b65e7fe] Inference time: 29.07 seconds, CPU usage: 36.7%, CPU core utilization: [19.4, 41.5, 59.0, 27.0] +2025-08-21 01:22:56 - INFO - [e5c0b243-2f04-4afd-a044-8e574b65e7fe] Cleaned up temporary frame directory: temp_videos/e5c0b243-2f04-4afd-a044-8e574b65e7fe +2025-08-21 01:22:56 - INFO - [2c19cb12-ab5a-476b-8773-34b5991b8716] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_047.mp4' +2025-08-21 01:22:56 - INFO - [2c19cb12-ab5a-476b-8773-34b5991b8716] Video saved to temporary file: temp_videos/2c19cb12-ab5a-476b-8773-34b5991b8716.mp4 +2025-08-21 01:22:56 - INFO - [2c19cb12-ab5a-476b-8773-34b5991b8716] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:23:00 - INFO - [2c19cb12-ab5a-476b-8773-34b5991b8716] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:23:00 - INFO - [2c19cb12-ab5a-476b-8773-34b5991b8716] 30 frames saved to temp_videos/2c19cb12-ab5a-476b-8773-34b5991b8716 +2025-08-21 01:23:13 - INFO - vision_config is None, using default vision config +2025-08-21 01:23:23 - INFO - Tokens per second: 3.9947178064700855, Peak GPU memory MB: 11824.375 +2025-08-21 01:23:23 - INFO - [2c19cb12-ab5a-476b-8773-34b5991b8716] Inference time: 27.28 seconds, CPU usage: 37.9%, CPU core utilization: [27.5, 21.8, 42.6, 59.5] +2025-08-21 01:23:23 - INFO - [2c19cb12-ab5a-476b-8773-34b5991b8716] Cleaned up temporary frame directory: temp_videos/2c19cb12-ab5a-476b-8773-34b5991b8716 +2025-08-21 01:23:23 - INFO - [1f521869-b6a6-4ffc-b7f9-3ba6424abdc9] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_048.mp4' +2025-08-21 01:23:23 - INFO - [1f521869-b6a6-4ffc-b7f9-3ba6424abdc9] Video saved to temporary file: temp_videos/1f521869-b6a6-4ffc-b7f9-3ba6424abdc9.mp4 +2025-08-21 01:23:23 - INFO - [1f521869-b6a6-4ffc-b7f9-3ba6424abdc9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:23:28 - INFO - [1f521869-b6a6-4ffc-b7f9-3ba6424abdc9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:23:28 - INFO - [1f521869-b6a6-4ffc-b7f9-3ba6424abdc9] 30 frames saved to temp_videos/1f521869-b6a6-4ffc-b7f9-3ba6424abdc9 +2025-08-21 01:23:41 - INFO - vision_config is None, using default vision config +2025-08-21 01:23:52 - INFO - Tokens per second: 5.431023554612727, Peak GPU memory MB: 11824.375 +2025-08-21 01:23:52 - INFO - [1f521869-b6a6-4ffc-b7f9-3ba6424abdc9] Inference time: 28.81 seconds, CPU usage: 36.9%, CPU core utilization: [35.2, 33.7, 57.7, 21.0] +2025-08-21 01:23:52 - INFO - [1f521869-b6a6-4ffc-b7f9-3ba6424abdc9] Cleaned up temporary frame directory: temp_videos/1f521869-b6a6-4ffc-b7f9-3ba6424abdc9 +2025-08-21 01:23:52 - INFO - [bb6768fb-6e99-4507-b665-27c2c1ae0b50] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_049.mp4' +2025-08-21 01:23:52 - INFO - [bb6768fb-6e99-4507-b665-27c2c1ae0b50] Video saved to temporary file: temp_videos/bb6768fb-6e99-4507-b665-27c2c1ae0b50.mp4 +2025-08-21 01:23:52 - INFO - [bb6768fb-6e99-4507-b665-27c2c1ae0b50] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:23:57 - INFO - [bb6768fb-6e99-4507-b665-27c2c1ae0b50] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:23:57 - INFO - [bb6768fb-6e99-4507-b665-27c2c1ae0b50] 30 frames saved to temp_videos/bb6768fb-6e99-4507-b665-27c2c1ae0b50 +2025-08-21 01:24:09 - INFO - vision_config is None, using default vision config +2025-08-21 01:24:20 - INFO - Tokens per second: 5.315589822361752, Peak GPU memory MB: 11824.375 +2025-08-21 01:24:20 - INFO - [bb6768fb-6e99-4507-b665-27c2c1ae0b50] Inference time: 28.63 seconds, CPU usage: 44.5%, CPU core utilization: [27.1, 55.7, 35.4, 59.8] +2025-08-21 01:24:20 - INFO - [bb6768fb-6e99-4507-b665-27c2c1ae0b50] Cleaned up temporary frame directory: temp_videos/bb6768fb-6e99-4507-b665-27c2c1ae0b50 +2025-08-21 01:24:20 - INFO - [7c0a1e36-be63-4b18-9096-cf0faa8f5ca7] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_050.mp4' +2025-08-21 01:24:20 - INFO - [7c0a1e36-be63-4b18-9096-cf0faa8f5ca7] Video saved to temporary file: temp_videos/7c0a1e36-be63-4b18-9096-cf0faa8f5ca7.mp4 +2025-08-21 01:24:20 - INFO - [7c0a1e36-be63-4b18-9096-cf0faa8f5ca7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:24:26 - INFO - [7c0a1e36-be63-4b18-9096-cf0faa8f5ca7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:24:26 - INFO - [7c0a1e36-be63-4b18-9096-cf0faa8f5ca7] 30 frames saved to temp_videos/7c0a1e36-be63-4b18-9096-cf0faa8f5ca7 +2025-08-21 01:24:39 - INFO - vision_config is None, using default vision config +2025-08-21 01:24:48 - INFO - Tokens per second: 4.2201806034921, Peak GPU memory MB: 11824.375 +2025-08-21 01:24:48 - INFO - [7c0a1e36-be63-4b18-9096-cf0faa8f5ca7] Inference time: 28.00 seconds, CPU usage: 37.8%, CPU core utilization: [45.9, 36.6, 32.4, 36.4] +2025-08-21 01:24:48 - INFO - [7c0a1e36-be63-4b18-9096-cf0faa8f5ca7] Cleaned up temporary frame directory: temp_videos/7c0a1e36-be63-4b18-9096-cf0faa8f5ca7 +2025-08-21 01:24:48 - INFO - [3a2ca98e-47b8-45aa-8834-9dc0e398936a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_051.mp4' +2025-08-21 01:24:48 - INFO - [3a2ca98e-47b8-45aa-8834-9dc0e398936a] Video saved to temporary file: temp_videos/3a2ca98e-47b8-45aa-8834-9dc0e398936a.mp4 +2025-08-21 01:24:48 - INFO - [3a2ca98e-47b8-45aa-8834-9dc0e398936a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:24:53 - INFO - [3a2ca98e-47b8-45aa-8834-9dc0e398936a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:24:53 - INFO - [3a2ca98e-47b8-45aa-8834-9dc0e398936a] 30 frames saved to temp_videos/3a2ca98e-47b8-45aa-8834-9dc0e398936a +2025-08-21 01:25:06 - INFO - vision_config is None, using default vision config +2025-08-21 01:25:16 - INFO - Tokens per second: 4.498009394398241, Peak GPU memory MB: 11824.375 +2025-08-21 01:25:16 - INFO - [3a2ca98e-47b8-45aa-8834-9dc0e398936a] Inference time: 27.74 seconds, CPU usage: 37.8%, CPU core utilization: [34.1, 28.6, 47.5, 40.9] +2025-08-21 01:25:16 - INFO - [3a2ca98e-47b8-45aa-8834-9dc0e398936a] Cleaned up temporary frame directory: temp_videos/3a2ca98e-47b8-45aa-8834-9dc0e398936a +2025-08-21 01:25:16 - INFO - [acb71d01-022e-4d9d-a98c-903f36965977] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_052.mp4' +2025-08-21 01:25:16 - INFO - [acb71d01-022e-4d9d-a98c-903f36965977] Video saved to temporary file: temp_videos/acb71d01-022e-4d9d-a98c-903f36965977.mp4 +2025-08-21 01:25:16 - INFO - [acb71d01-022e-4d9d-a98c-903f36965977] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:25:22 - INFO - [acb71d01-022e-4d9d-a98c-903f36965977] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:25:22 - INFO - [acb71d01-022e-4d9d-a98c-903f36965977] 30 frames saved to temp_videos/acb71d01-022e-4d9d-a98c-903f36965977 +2025-08-21 01:25:35 - INFO - vision_config is None, using default vision config +2025-08-21 01:25:47 - INFO - Tokens per second: 6.6824774787667796, Peak GPU memory MB: 11824.375 +2025-08-21 01:25:47 - INFO - [acb71d01-022e-4d9d-a98c-903f36965977] Inference time: 31.31 seconds, CPU usage: 44.4%, CPU core utilization: [35.8, 44.5, 56.2, 41.3] +2025-08-21 01:25:47 - INFO - [acb71d01-022e-4d9d-a98c-903f36965977] Cleaned up temporary frame directory: temp_videos/acb71d01-022e-4d9d-a98c-903f36965977 diff --git a/API_Transformers/logs/MiniCPM-V-4/20250821_033846.log b/API_Transformers/logs/MiniCPM-V-4/20250821_033846.log new file mode 100644 index 0000000000000000000000000000000000000000..504e7691eccab42fb751e3b4392207c48fbbb589 --- /dev/null +++ b/API_Transformers/logs/MiniCPM-V-4/20250821_033846.log @@ -0,0 +1,157 @@ +2025-08-21 03:38:46 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-21 03:38:46 - INFO - vision_config is None, using default vision config +2025-08-21 03:39:50 - INFO - Model loaded in 64.62 seconds +2025-08-21 03:39:50 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-21 03:39:57 - INFO - [e29d31c5-9a6b-48cd-ac25-7affc04fc186] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_001.mp4' +2025-08-21 03:39:57 - INFO - [e29d31c5-9a6b-48cd-ac25-7affc04fc186] Video saved to temporary file: temp_videos/e29d31c5-9a6b-48cd-ac25-7affc04fc186.mp4 +2025-08-21 03:39:57 - INFO - [e29d31c5-9a6b-48cd-ac25-7affc04fc186] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:40:01 - INFO - [e29d31c5-9a6b-48cd-ac25-7affc04fc186] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:40:01 - INFO - [e29d31c5-9a6b-48cd-ac25-7affc04fc186] 30 frames saved to temp_videos/e29d31c5-9a6b-48cd-ac25-7affc04fc186 +2025-08-21 03:40:17 - INFO - vision_config is None, using default vision config +2025-08-21 03:40:35 - INFO - Tokens per second: 8.46238691458392, Peak GPU memory MB: 11824.375 +2025-08-21 03:40:35 - INFO - [e29d31c5-9a6b-48cd-ac25-7affc04fc186] Inference time: 37.25 seconds, CPU usage: 28.4%, CPU core utilization: [24.9, 35.0, 21.8, 31.7] +2025-08-21 03:40:35 - INFO - [e29d31c5-9a6b-48cd-ac25-7affc04fc186] Cleaned up temporary frame directory: temp_videos/e29d31c5-9a6b-48cd-ac25-7affc04fc186 +2025-08-21 03:40:35 - INFO - [0ed8d6d1-aea6-4701-a3ed-2d877bfc9882] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_001.mp4' +2025-08-21 03:40:35 - INFO - [0ed8d6d1-aea6-4701-a3ed-2d877bfc9882] Video saved to temporary file: temp_videos/0ed8d6d1-aea6-4701-a3ed-2d877bfc9882.mp4 +2025-08-21 03:40:35 - INFO - [0ed8d6d1-aea6-4701-a3ed-2d877bfc9882] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:40:38 - INFO - [0ed8d6d1-aea6-4701-a3ed-2d877bfc9882] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:40:38 - INFO - [0ed8d6d1-aea6-4701-a3ed-2d877bfc9882] 30 frames saved to temp_videos/0ed8d6d1-aea6-4701-a3ed-2d877bfc9882 +2025-08-21 03:40:51 - INFO - vision_config is None, using default vision config +2025-08-21 03:41:14 - INFO - Tokens per second: 10.024019230028593, Peak GPU memory MB: 11824.375 +2025-08-21 03:41:14 - INFO - [0ed8d6d1-aea6-4701-a3ed-2d877bfc9882] Inference time: 39.58 seconds, CPU usage: 31.8%, CPU core utilization: [16.9, 19.2, 59.5, 31.5] +2025-08-21 03:41:14 - INFO - [0ed8d6d1-aea6-4701-a3ed-2d877bfc9882] Cleaned up temporary frame directory: temp_videos/0ed8d6d1-aea6-4701-a3ed-2d877bfc9882 +2025-08-21 03:41:14 - INFO - [1daa28b5-5708-4bd7-b738-7900bee17284] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_002.mp4' +2025-08-21 03:41:14 - INFO - [1daa28b5-5708-4bd7-b738-7900bee17284] Video saved to temporary file: temp_videos/1daa28b5-5708-4bd7-b738-7900bee17284.mp4 +2025-08-21 03:41:14 - INFO - [1daa28b5-5708-4bd7-b738-7900bee17284] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:41:18 - INFO - [1daa28b5-5708-4bd7-b738-7900bee17284] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:41:18 - INFO - [1daa28b5-5708-4bd7-b738-7900bee17284] 30 frames saved to temp_videos/1daa28b5-5708-4bd7-b738-7900bee17284 +2025-08-21 03:41:30 - INFO - vision_config is None, using default vision config +2025-08-21 03:41:42 - INFO - Tokens per second: 6.118521643289556, Peak GPU memory MB: 11824.375 +2025-08-21 03:41:42 - INFO - [1daa28b5-5708-4bd7-b738-7900bee17284] Inference time: 28.18 seconds, CPU usage: 33.2%, CPU core utilization: [57.3, 19.5, 10.5, 45.5] +2025-08-21 03:41:42 - INFO - [1daa28b5-5708-4bd7-b738-7900bee17284] Cleaned up temporary frame directory: temp_videos/1daa28b5-5708-4bd7-b738-7900bee17284 +2025-08-21 03:41:42 - INFO - [c70dd357-164c-4d57-b24d-8ead295ef24e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_003.mp4' +2025-08-21 03:41:42 - INFO - [c70dd357-164c-4d57-b24d-8ead295ef24e] Video saved to temporary file: temp_videos/c70dd357-164c-4d57-b24d-8ead295ef24e.mp4 +2025-08-21 03:41:42 - INFO - [c70dd357-164c-4d57-b24d-8ead295ef24e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:41:46 - INFO - [c70dd357-164c-4d57-b24d-8ead295ef24e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:41:46 - INFO - [c70dd357-164c-4d57-b24d-8ead295ef24e] 30 frames saved to temp_videos/c70dd357-164c-4d57-b24d-8ead295ef24e +2025-08-21 03:41:59 - INFO - vision_config is None, using default vision config +2025-08-21 03:42:13 - INFO - Tokens per second: 7.325785835893888, Peak GPU memory MB: 11824.375 +2025-08-21 03:42:13 - INFO - [c70dd357-164c-4d57-b24d-8ead295ef24e] Inference time: 30.34 seconds, CPU usage: 33.2%, CPU core utilization: [32.8, 46.1, 30.7, 23.2] +2025-08-21 03:42:13 - INFO - [c70dd357-164c-4d57-b24d-8ead295ef24e] Cleaned up temporary frame directory: temp_videos/c70dd357-164c-4d57-b24d-8ead295ef24e +2025-08-21 03:42:13 - INFO - [e2eff8d2-37db-4d25-9765-d46404130b2d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_004.mp4' +2025-08-21 03:42:13 - INFO - [e2eff8d2-37db-4d25-9765-d46404130b2d] Video saved to temporary file: temp_videos/e2eff8d2-37db-4d25-9765-d46404130b2d.mp4 +2025-08-21 03:42:13 - INFO - [e2eff8d2-37db-4d25-9765-d46404130b2d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:42:16 - INFO - [e2eff8d2-37db-4d25-9765-d46404130b2d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:42:16 - INFO - [e2eff8d2-37db-4d25-9765-d46404130b2d] 30 frames saved to temp_videos/e2eff8d2-37db-4d25-9765-d46404130b2d +2025-08-21 03:42:29 - INFO - vision_config is None, using default vision config +2025-08-21 03:42:40 - INFO - Tokens per second: 5.483056762285139, Peak GPU memory MB: 11824.375 +2025-08-21 03:42:40 - INFO - [e2eff8d2-37db-4d25-9765-d46404130b2d] Inference time: 27.37 seconds, CPU usage: 33.6%, CPU core utilization: [62.6, 13.2, 42.9, 15.6] +2025-08-21 03:42:40 - INFO - [e2eff8d2-37db-4d25-9765-d46404130b2d] Cleaned up temporary frame directory: temp_videos/e2eff8d2-37db-4d25-9765-d46404130b2d +2025-08-21 03:42:40 - INFO - [374baf0b-09b4-47f6-bda7-007ed31b73e6] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_005.mp4' +2025-08-21 03:42:40 - INFO - [374baf0b-09b4-47f6-bda7-007ed31b73e6] Video saved to temporary file: temp_videos/374baf0b-09b4-47f6-bda7-007ed31b73e6.mp4 +2025-08-21 03:42:40 - INFO - [374baf0b-09b4-47f6-bda7-007ed31b73e6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:42:43 - INFO - [374baf0b-09b4-47f6-bda7-007ed31b73e6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:42:43 - INFO - [374baf0b-09b4-47f6-bda7-007ed31b73e6] 30 frames saved to temp_videos/374baf0b-09b4-47f6-bda7-007ed31b73e6 +2025-08-21 03:42:56 - INFO - vision_config is None, using default vision config +2025-08-21 03:43:12 - INFO - Tokens per second: 7.8524871607145865, Peak GPU memory MB: 11824.375 +2025-08-21 03:43:12 - INFO - [374baf0b-09b4-47f6-bda7-007ed31b73e6] Inference time: 31.57 seconds, CPU usage: 32.7%, CPU core utilization: [13.2, 42.4, 47.5, 27.5] +2025-08-21 03:43:12 - INFO - [374baf0b-09b4-47f6-bda7-007ed31b73e6] Cleaned up temporary frame directory: temp_videos/374baf0b-09b4-47f6-bda7-007ed31b73e6 +2025-08-21 03:43:12 - INFO - [b9266afc-5115-4696-91ea-9894092513ff] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_006.mp4' +2025-08-21 03:43:12 - INFO - [b9266afc-5115-4696-91ea-9894092513ff] Video saved to temporary file: temp_videos/b9266afc-5115-4696-91ea-9894092513ff.mp4 +2025-08-21 03:43:12 - INFO - [b9266afc-5115-4696-91ea-9894092513ff] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:43:15 - INFO - [b9266afc-5115-4696-91ea-9894092513ff] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:43:15 - INFO - [b9266afc-5115-4696-91ea-9894092513ff] 30 frames saved to temp_videos/b9266afc-5115-4696-91ea-9894092513ff +2025-08-21 03:43:28 - INFO - vision_config is None, using default vision config +2025-08-21 03:43:40 - INFO - Tokens per second: 5.751292635048318, Peak GPU memory MB: 11824.375 +2025-08-21 03:43:40 - INFO - [b9266afc-5115-4696-91ea-9894092513ff] Inference time: 27.84 seconds, CPU usage: 33.5%, CPU core utilization: [24.6, 32.6, 13.2, 63.8] +2025-08-21 03:43:40 - INFO - [b9266afc-5115-4696-91ea-9894092513ff] Cleaned up temporary frame directory: temp_videos/b9266afc-5115-4696-91ea-9894092513ff +2025-08-21 03:43:40 - INFO - [cf387e92-735c-444b-a102-345d888dc633] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_007.mp4' +2025-08-21 03:43:40 - INFO - [cf387e92-735c-444b-a102-345d888dc633] Video saved to temporary file: temp_videos/cf387e92-735c-444b-a102-345d888dc633.mp4 +2025-08-21 03:43:40 - INFO - [cf387e92-735c-444b-a102-345d888dc633] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:43:43 - INFO - [cf387e92-735c-444b-a102-345d888dc633] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:43:43 - INFO - [cf387e92-735c-444b-a102-345d888dc633] 30 frames saved to temp_videos/cf387e92-735c-444b-a102-345d888dc633 +2025-08-21 03:43:56 - INFO - vision_config is None, using default vision config +2025-08-21 03:44:08 - INFO - Tokens per second: 6.460640309211369, Peak GPU memory MB: 11824.375 +2025-08-21 03:44:08 - INFO - [cf387e92-735c-444b-a102-345d888dc633] Inference time: 28.88 seconds, CPU usage: 33.2%, CPU core utilization: [11.9, 52.0, 12.7, 56.4] +2025-08-21 03:44:08 - INFO - [cf387e92-735c-444b-a102-345d888dc633] Cleaned up temporary frame directory: temp_videos/cf387e92-735c-444b-a102-345d888dc633 +2025-08-21 03:44:08 - INFO - [aef21c10-5565-48f5-bcbf-e239a1faa322] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_008.mp4' +2025-08-21 03:44:08 - INFO - [aef21c10-5565-48f5-bcbf-e239a1faa322] Video saved to temporary file: temp_videos/aef21c10-5565-48f5-bcbf-e239a1faa322.mp4 +2025-08-21 03:44:08 - INFO - [aef21c10-5565-48f5-bcbf-e239a1faa322] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:44:12 - INFO - [aef21c10-5565-48f5-bcbf-e239a1faa322] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:44:12 - INFO - [aef21c10-5565-48f5-bcbf-e239a1faa322] 30 frames saved to temp_videos/aef21c10-5565-48f5-bcbf-e239a1faa322 +2025-08-21 03:44:25 - INFO - vision_config is None, using default vision config +2025-08-21 03:44:35 - INFO - Tokens per second: 4.950112497910254, Peak GPU memory MB: 11824.375 +2025-08-21 03:44:35 - INFO - [aef21c10-5565-48f5-bcbf-e239a1faa322] Inference time: 26.99 seconds, CPU usage: 34.1%, CPU core utilization: [16.3, 12.5, 47.8, 59.8] +2025-08-21 03:44:35 - INFO - [aef21c10-5565-48f5-bcbf-e239a1faa322] Cleaned up temporary frame directory: temp_videos/aef21c10-5565-48f5-bcbf-e239a1faa322 +2025-08-21 03:44:35 - INFO - [040650dd-914d-453f-a411-b31d1d6897d5] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_009.mp4' +2025-08-21 03:44:35 - INFO - [040650dd-914d-453f-a411-b31d1d6897d5] Video saved to temporary file: temp_videos/040650dd-914d-453f-a411-b31d1d6897d5.mp4 +2025-08-21 03:44:35 - INFO - [040650dd-914d-453f-a411-b31d1d6897d5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:44:39 - INFO - [040650dd-914d-453f-a411-b31d1d6897d5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:44:39 - INFO - [040650dd-914d-453f-a411-b31d1d6897d5] 30 frames saved to temp_videos/040650dd-914d-453f-a411-b31d1d6897d5 +2025-08-21 03:44:52 - INFO - vision_config is None, using default vision config +2025-08-21 03:45:04 - INFO - Tokens per second: 6.046726583056993, Peak GPU memory MB: 11824.375 +2025-08-21 03:45:04 - INFO - [040650dd-914d-453f-a411-b31d1d6897d5] Inference time: 28.31 seconds, CPU usage: 33.8%, CPU core utilization: [45.1, 36.7, 40.1, 13.1] +2025-08-21 03:45:04 - INFO - [040650dd-914d-453f-a411-b31d1d6897d5] Cleaned up temporary frame directory: temp_videos/040650dd-914d-453f-a411-b31d1d6897d5 +2025-08-21 03:45:04 - INFO - [c4922af4-0973-46aa-8ab3-a2904f616ca0] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_010.mp4' +2025-08-21 03:45:04 - INFO - [c4922af4-0973-46aa-8ab3-a2904f616ca0] Video saved to temporary file: temp_videos/c4922af4-0973-46aa-8ab3-a2904f616ca0.mp4 +2025-08-21 03:45:04 - INFO - [c4922af4-0973-46aa-8ab3-a2904f616ca0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:45:07 - INFO - [c4922af4-0973-46aa-8ab3-a2904f616ca0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:45:07 - INFO - [c4922af4-0973-46aa-8ab3-a2904f616ca0] 30 frames saved to temp_videos/c4922af4-0973-46aa-8ab3-a2904f616ca0 +2025-08-21 03:45:20 - INFO - vision_config is None, using default vision config +2025-08-21 03:45:31 - INFO - Tokens per second: 5.012952424490043, Peak GPU memory MB: 11824.375 +2025-08-21 03:45:31 - INFO - [c4922af4-0973-46aa-8ab3-a2904f616ca0] Inference time: 26.92 seconds, CPU usage: 33.9%, CPU core utilization: [45.7, 15.2, 24.5, 49.9] +2025-08-21 03:45:31 - INFO - [c4922af4-0973-46aa-8ab3-a2904f616ca0] Cleaned up temporary frame directory: temp_videos/c4922af4-0973-46aa-8ab3-a2904f616ca0 +2025-08-21 03:45:31 - INFO - [96d2962b-c166-4be7-847e-fe025954af18] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_011.mp4' +2025-08-21 03:45:31 - INFO - [96d2962b-c166-4be7-847e-fe025954af18] Video saved to temporary file: temp_videos/96d2962b-c166-4be7-847e-fe025954af18.mp4 +2025-08-21 03:45:31 - INFO - [96d2962b-c166-4be7-847e-fe025954af18] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:45:34 - INFO - [96d2962b-c166-4be7-847e-fe025954af18] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:45:34 - INFO - [96d2962b-c166-4be7-847e-fe025954af18] 30 frames saved to temp_videos/96d2962b-c166-4be7-847e-fe025954af18 +2025-08-21 03:45:47 - INFO - vision_config is None, using default vision config +2025-08-21 03:45:59 - INFO - Tokens per second: 6.0496181050699604, Peak GPU memory MB: 11824.375 +2025-08-21 03:45:59 - INFO - [96d2962b-c166-4be7-847e-fe025954af18] Inference time: 28.26 seconds, CPU usage: 33.7%, CPU core utilization: [15.3, 25.6, 46.3, 47.7] +2025-08-21 03:45:59 - INFO - [96d2962b-c166-4be7-847e-fe025954af18] Cleaned up temporary frame directory: temp_videos/96d2962b-c166-4be7-847e-fe025954af18 +2025-08-21 03:45:59 - INFO - [1861ded3-2706-4381-8e32-07949f940d95] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_012.mp4' +2025-08-21 03:45:59 - INFO - [1861ded3-2706-4381-8e32-07949f940d95] Video saved to temporary file: temp_videos/1861ded3-2706-4381-8e32-07949f940d95.mp4 +2025-08-21 03:45:59 - INFO - [1861ded3-2706-4381-8e32-07949f940d95] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:46:02 - INFO - [1861ded3-2706-4381-8e32-07949f940d95] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:46:02 - INFO - [1861ded3-2706-4381-8e32-07949f940d95] 30 frames saved to temp_videos/1861ded3-2706-4381-8e32-07949f940d95 +2025-08-21 03:46:15 - INFO - vision_config is None, using default vision config +2025-08-21 03:46:27 - INFO - Tokens per second: 6.042554615117601, Peak GPU memory MB: 11824.375 +2025-08-21 03:46:27 - INFO - [1861ded3-2706-4381-8e32-07949f940d95] Inference time: 28.37 seconds, CPU usage: 33.3%, CPU core utilization: [53.9, 22.6, 36.0, 20.6] +2025-08-21 03:46:27 - INFO - [1861ded3-2706-4381-8e32-07949f940d95] Cleaned up temporary frame directory: temp_videos/1861ded3-2706-4381-8e32-07949f940d95 +2025-08-21 03:46:27 - INFO - [04268664-f928-4c89-ab42-d07706c93257] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_013.mp4' +2025-08-21 03:46:27 - INFO - [04268664-f928-4c89-ab42-d07706c93257] Video saved to temporary file: temp_videos/04268664-f928-4c89-ab42-d07706c93257.mp4 +2025-08-21 03:46:27 - INFO - [04268664-f928-4c89-ab42-d07706c93257] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:46:31 - INFO - [04268664-f928-4c89-ab42-d07706c93257] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:46:31 - INFO - [04268664-f928-4c89-ab42-d07706c93257] 30 frames saved to temp_videos/04268664-f928-4c89-ab42-d07706c93257 +2025-08-21 03:46:44 - INFO - vision_config is None, using default vision config +2025-08-21 03:46:55 - INFO - Tokens per second: 5.897914933533588, Peak GPU memory MB: 11824.375 +2025-08-21 03:46:55 - INFO - [04268664-f928-4c89-ab42-d07706c93257] Inference time: 28.07 seconds, CPU usage: 33.2%, CPU core utilization: [52.2, 22.0, 22.6, 35.7] +2025-08-21 03:46:55 - INFO - [04268664-f928-4c89-ab42-d07706c93257] Cleaned up temporary frame directory: temp_videos/04268664-f928-4c89-ab42-d07706c93257 +2025-08-21 03:46:55 - INFO - [604fd124-b44b-4dfe-b7b4-8d3e1f179d69] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_014.mp4' +2025-08-21 03:46:55 - INFO - [604fd124-b44b-4dfe-b7b4-8d3e1f179d69] Video saved to temporary file: temp_videos/604fd124-b44b-4dfe-b7b4-8d3e1f179d69.mp4 +2025-08-21 03:46:55 - INFO - [604fd124-b44b-4dfe-b7b4-8d3e1f179d69] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:46:59 - INFO - [604fd124-b44b-4dfe-b7b4-8d3e1f179d69] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:46:59 - INFO - [604fd124-b44b-4dfe-b7b4-8d3e1f179d69] 30 frames saved to temp_videos/604fd124-b44b-4dfe-b7b4-8d3e1f179d69 +2025-08-21 03:47:12 - INFO - vision_config is None, using default vision config +2025-08-21 03:47:25 - INFO - Tokens per second: 6.542944804531987, Peak GPU memory MB: 11824.375 +2025-08-21 03:47:25 - INFO - [604fd124-b44b-4dfe-b7b4-8d3e1f179d69] Inference time: 29.09 seconds, CPU usage: 32.5%, CPU core utilization: [11.8, 19.4, 53.2, 45.3] +2025-08-21 03:47:25 - INFO - [604fd124-b44b-4dfe-b7b4-8d3e1f179d69] Cleaned up temporary frame directory: temp_videos/604fd124-b44b-4dfe-b7b4-8d3e1f179d69 +2025-08-21 03:47:25 - INFO - [02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_015.mp4' +2025-08-21 03:47:25 - INFO - [02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d] Video saved to temporary file: temp_videos/02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d.mp4 +2025-08-21 03:47:25 - INFO - [02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:47:28 - INFO - [02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:47:28 - INFO - [02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d] 30 frames saved to temp_videos/02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d +2025-08-21 03:47:41 - INFO - vision_config is None, using default vision config +2025-08-21 03:47:52 - INFO - Tokens per second: 5.0694963028257245, Peak GPU memory MB: 11824.375 +2025-08-21 03:47:52 - INFO - [02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d] Inference time: 27.06 seconds, CPU usage: 33.0%, CPU core utilization: [18.7, 21.7, 41.3, 50.1] +2025-08-21 03:47:52 - INFO - [02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d] Cleaned up temporary frame directory: temp_videos/02b38a1d-8ab7-43a6-a5e6-8a531ff2ce6d +2025-08-21 03:47:52 - INFO - [0ac85bea-34eb-4a31-8263-f7810fb38235] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_016.mp4' +2025-08-21 03:47:52 - INFO - [0ac85bea-34eb-4a31-8263-f7810fb38235] Video saved to temporary file: temp_videos/0ac85bea-34eb-4a31-8263-f7810fb38235.mp4 +2025-08-21 03:47:52 - INFO - [0ac85bea-34eb-4a31-8263-f7810fb38235] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 03:47:55 - INFO - [0ac85bea-34eb-4a31-8263-f7810fb38235] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 03:47:55 - INFO - [0ac85bea-34eb-4a31-8263-f7810fb38235] 30 frames saved to temp_videos/0ac85bea-34eb-4a31-8263-f7810fb38235 +2025-08-21 03:48:08 - INFO - vision_config is None, using default vision config +2025-08-21 03:48:19 - INFO - Tokens per second: 5.245383520327553, Peak GPU memory MB: 11824.375 +2025-08-21 03:48:19 - INFO - [0ac85bea-34eb-4a31-8263-f7810fb38235] Inference time: 27.29 seconds, CPU usage: 33.6%, CPU core utilization: [53.0, 47.3, 12.8, 21.4] +2025-08-21 03:48:19 - INFO - [0ac85bea-34eb-4a31-8263-f7810fb38235] Cleaned up temporary frame directory: temp_videos/0ac85bea-34eb-4a31-8263-f7810fb38235 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_212712.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_212712.log new file mode 100644 index 0000000000000000000000000000000000000000..8731021fdf1abfce1d00db5a93563a061f2a3ac7 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_212712.log @@ -0,0 +1 @@ +2025-08-18 21:27:12 - INFO - Loading model: Qwen2-VL-2B-Instruct-AWQ diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_212744.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_212744.log new file mode 100644 index 0000000000000000000000000000000000000000..79b8b23cbc26f0d6a64b518ff022c5f14ebc8b9d --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_212744.log @@ -0,0 +1,20 @@ +2025-08-18 21:27:44 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 21:27:46 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 21:27:54 - INFO - Model loaded in 9.42 seconds +2025-08-18 21:27:54 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 21:29:38 - INFO - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] Received new video inference request. Prompt: '视频里发生了什么?', Video: 'messi_part_022.mp4' +2025-08-18 21:29:38 - INFO - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] Video saved to temporary file: temp_videos/f4055b77-a7b2-4f36-8134-d0a30d7f57b0.mp4 +2025-08-18 21:29:38 - INFO - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 21:29:47 - INFO - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 21:29:47 - INFO - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] 30 frames saved to temp_videos/f4055b77-a7b2-4f36-8134-d0a30d7f57b0 +2025-08-18 21:29:49 - ERROR - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] An error occurred during processing: name 'processor' is not defined +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/local_deploy/infer.py", line 105, in video_inference + output_text = model.generate(frame_paths, prompt) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/local_deploy/models/qwen.py", line 44, in generate + streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True) + ^^^^^^^^^ +NameError: name 'processor' is not defined +2025-08-18 21:29:49 - INFO - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] Cleaned up temporary file: temp_videos/f4055b77-a7b2-4f36-8134-d0a30d7f57b0.mp4 +2025-08-18 21:29:49 - INFO - [f4055b77-a7b2-4f36-8134-d0a30d7f57b0] Cleaned up temporary frame directory: temp_videos/f4055b77-a7b2-4f36-8134-d0a30d7f57b0 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_213116.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_213116.log new file mode 100644 index 0000000000000000000000000000000000000000..e53e10b061b1d83b6309e4f1419e6836cdad9188 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_213116.log @@ -0,0 +1,9 @@ +2025-08-18 21:31:16 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 21:31:19 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 21:31:26 - INFO - Model loaded in 10.07 seconds +2025-08-18 21:31:26 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 21:31:32 - INFO - [4b186b65-1c0b-488e-b92f-aa90f72b6b89] Received new video inference request. Prompt: '视频里发生了什么?', Video: 'messi_part_022.mp4' +2025-08-18 21:31:32 - INFO - [4b186b65-1c0b-488e-b92f-aa90f72b6b89] Video saved to temporary file: temp_videos/4b186b65-1c0b-488e-b92f-aa90f72b6b89.mp4 +2025-08-18 21:31:32 - INFO - [4b186b65-1c0b-488e-b92f-aa90f72b6b89] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 21:31:38 - INFO - [4b186b65-1c0b-488e-b92f-aa90f72b6b89] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 21:31:38 - INFO - [4b186b65-1c0b-488e-b92f-aa90f72b6b89] 30 frames saved to temp_videos/4b186b65-1c0b-488e-b92f-aa90f72b6b89 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_214203.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_214203.log new file mode 100644 index 0000000000000000000000000000000000000000..1bcc0335363b5cdf58d8f6b3db82cf0f8b2d9f9c --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_214203.log @@ -0,0 +1,20 @@ +2025-08-18 21:42:03 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 21:42:05 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 21:42:12 - INFO - Model loaded in 8.75 seconds +2025-08-18 21:42:12 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 21:42:48 - INFO - [a8230cea-9aea-49ae-9a22-ef739e460b3a] Received new video inference request. Prompt: '视频里发生了什么?', Video: 'messi_part_022.mp4' +2025-08-18 21:42:48 - INFO - [a8230cea-9aea-49ae-9a22-ef739e460b3a] Video saved to temporary file: temp_videos/a8230cea-9aea-49ae-9a22-ef739e460b3a.mp4 +2025-08-18 21:42:48 - INFO - [a8230cea-9aea-49ae-9a22-ef739e460b3a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 21:42:54 - INFO - [a8230cea-9aea-49ae-9a22-ef739e460b3a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 21:42:54 - INFO - [a8230cea-9aea-49ae-9a22-ef739e460b3a] 30 frames saved to temp_videos/a8230cea-9aea-49ae-9a22-ef739e460b3a +2025-08-18 21:42:54 - INFO - Prompt token length: 2276 +2025-08-18 21:43:14 - INFO - [a8230cea-9aea-49ae-9a22-ef739e460b3a] Cleaned up temporary file: temp_videos/a8230cea-9aea-49ae-9a22-ef739e460b3a.mp4 +2025-08-18 21:43:14 - INFO - [a8230cea-9aea-49ae-9a22-ef739e460b3a] Cleaned up temporary frame directory: temp_videos/a8230cea-9aea-49ae-9a22-ef739e460b3a +2025-08-18 21:43:43 - INFO - [24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac] Received new video inference request. Prompt: 'Please describe the video in detail.', Video: 'messi_part_022.mp4' +2025-08-18 21:43:43 - INFO - [24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac] Video saved to temporary file: temp_videos/24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac.mp4 +2025-08-18 21:43:43 - INFO - [24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 21:43:48 - INFO - [24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 21:43:48 - INFO - [24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac] 30 frames saved to temp_videos/24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac +2025-08-18 21:43:49 - INFO - Prompt token length: 2278 +2025-08-18 21:43:59 - INFO - [24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac] Cleaned up temporary file: temp_videos/24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac.mp4 +2025-08-18 21:43:59 - INFO - [24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac] Cleaned up temporary frame directory: temp_videos/24e5c5d0-b5ee-4bb0-b431-ebf1b58533ac diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_215326.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_215326.log new file mode 100644 index 0000000000000000000000000000000000000000..7a4895a65052246eb2eeda2a1ced00ca9eda29d1 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_215326.log @@ -0,0 +1,44 @@ +2025-08-18 21:53:26 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 21:53:28 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 21:53:35 - INFO - Model loaded in 8.77 seconds +2025-08-18 21:53:35 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 21:53:50 - INFO - [c0e9cdfb-ee82-4829-855a-2e1b42058c09] Received new video inference request. Prompt: 'Please describe the video in detail.', Video: 'messi_part_001.mp4' +2025-08-18 21:53:50 - INFO - [c0e9cdfb-ee82-4829-855a-2e1b42058c09] Video saved to temporary file: temp_videos/c0e9cdfb-ee82-4829-855a-2e1b42058c09.mp4 +2025-08-18 21:53:50 - INFO - [c0e9cdfb-ee82-4829-855a-2e1b42058c09] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 21:53:52 - INFO - [c0e9cdfb-ee82-4829-855a-2e1b42058c09] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 21:53:52 - INFO - [c0e9cdfb-ee82-4829-855a-2e1b42058c09] 30 frames saved to temp_videos/c0e9cdfb-ee82-4829-855a-2e1b42058c09 +2025-08-18 21:53:53 - INFO - Prompt token length: 2278 +2025-08-18 21:54:13 - INFO - [c0e9cdfb-ee82-4829-855a-2e1b42058c09] Cleaned up temporary file: temp_videos/c0e9cdfb-ee82-4829-855a-2e1b42058c09.mp4 +2025-08-18 21:54:13 - INFO - [c0e9cdfb-ee82-4829-855a-2e1b42058c09] Cleaned up temporary frame directory: temp_videos/c0e9cdfb-ee82-4829-855a-2e1b42058c09 +2025-08-18 21:55:01 - INFO - [62a196af-36df-460b-8259-6c55c7dd812b] Received new video inference request. Prompt: 'Please describe the video in detail.', Video: 'messi_part_001.mp4' +2025-08-18 21:55:01 - INFO - [62a196af-36df-460b-8259-6c55c7dd812b] Video saved to temporary file: temp_videos/62a196af-36df-460b-8259-6c55c7dd812b.mp4 +2025-08-18 21:55:01 - INFO - [62a196af-36df-460b-8259-6c55c7dd812b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 21:55:03 - INFO - [62a196af-36df-460b-8259-6c55c7dd812b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 21:55:03 - INFO - [62a196af-36df-460b-8259-6c55c7dd812b] 30 frames saved to temp_videos/62a196af-36df-460b-8259-6c55c7dd812b +2025-08-18 21:55:04 - INFO - Prompt token length: 2278 +2025-08-18 21:55:23 - INFO - [62a196af-36df-460b-8259-6c55c7dd812b] Cleaned up temporary file: temp_videos/62a196af-36df-460b-8259-6c55c7dd812b.mp4 +2025-08-18 21:55:23 - INFO - [62a196af-36df-460b-8259-6c55c7dd812b] Cleaned up temporary frame directory: temp_videos/62a196af-36df-460b-8259-6c55c7dd812b +2025-08-18 21:58:51 - INFO - [91718f5c-a793-425c-80ca-f228057def8f] Received new video inference request. Prompt: 'Please describe the video in detail.', Video: 'messi_part_001.mp4' +2025-08-18 21:58:51 - INFO - [91718f5c-a793-425c-80ca-f228057def8f] Video saved to temporary file: temp_videos/91718f5c-a793-425c-80ca-f228057def8f.mp4 +2025-08-18 21:58:51 - INFO - [91718f5c-a793-425c-80ca-f228057def8f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 21:58:53 - INFO - [91718f5c-a793-425c-80ca-f228057def8f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 21:58:53 - INFO - [91718f5c-a793-425c-80ca-f228057def8f] 30 frames saved to temp_videos/91718f5c-a793-425c-80ca-f228057def8f +2025-08-18 21:58:54 - INFO - Prompt token length: 2278 +2025-08-18 21:59:13 - INFO - [91718f5c-a793-425c-80ca-f228057def8f] Cleaned up temporary file: temp_videos/91718f5c-a793-425c-80ca-f228057def8f.mp4 +2025-08-18 21:59:13 - INFO - [91718f5c-a793-425c-80ca-f228057def8f] Cleaned up temporary frame directory: temp_videos/91718f5c-a793-425c-80ca-f228057def8f +2025-08-18 22:00:03 - INFO - [b642c617-2577-4079-8f47-9a0f03b8f46a] Received new video inference request. Prompt: 'Please describe the video in detail.', Video: 'messi_part_001.mp4' +2025-08-18 22:00:03 - INFO - [b642c617-2577-4079-8f47-9a0f03b8f46a] Video saved to temporary file: temp_videos/b642c617-2577-4079-8f47-9a0f03b8f46a.mp4 +2025-08-18 22:00:03 - INFO - [b642c617-2577-4079-8f47-9a0f03b8f46a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:00:06 - INFO - [b642c617-2577-4079-8f47-9a0f03b8f46a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:00:06 - INFO - [b642c617-2577-4079-8f47-9a0f03b8f46a] 30 frames saved to temp_videos/b642c617-2577-4079-8f47-9a0f03b8f46a +2025-08-18 22:00:06 - INFO - Prompt token length: 2278 +2025-08-18 22:00:25 - INFO - [b642c617-2577-4079-8f47-9a0f03b8f46a] Cleaned up temporary file: temp_videos/b642c617-2577-4079-8f47-9a0f03b8f46a.mp4 +2025-08-18 22:00:25 - INFO - [b642c617-2577-4079-8f47-9a0f03b8f46a] Cleaned up temporary frame directory: temp_videos/b642c617-2577-4079-8f47-9a0f03b8f46a +2025-08-18 22:01:37 - INFO - [f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8] Received new video inference request. Prompt: 'Please describe the video in detail.', Video: 'messi_part_001.mp4' +2025-08-18 22:01:37 - INFO - [f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8] Video saved to temporary file: temp_videos/f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8.mp4 +2025-08-18 22:01:37 - INFO - [f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:01:41 - INFO - [f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:01:41 - INFO - [f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8] 30 frames saved to temp_videos/f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8 +2025-08-18 22:01:42 - INFO - Prompt token length: 2278 +2025-08-18 22:01:51 - INFO - [f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8] Cleaned up temporary file: temp_videos/f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8.mp4 +2025-08-18 22:01:51 - INFO - [f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8] Cleaned up temporary frame directory: temp_videos/f91a5b3b-cd53-4ca6-9c5d-dd0b9432dba8 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_221356.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_221356.log new file mode 100644 index 0000000000000000000000000000000000000000..40dc7ffe39e09f1424b8528dc7b185c267bd49a7 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_221356.log @@ -0,0 +1,22 @@ +2025-08-18 22:13:56 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:13:58 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:14:05 - INFO - Model loaded in 8.85 seconds +2025-08-18 22:14:05 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 22:14:11 - INFO - [723f6887-db9b-43c9-846f-fc1b2cb67237] Received new video inference request. Prompt: 'Please describe the video in detail.', Video: 'messi_part_001.mp4' +2025-08-18 22:14:11 - INFO - [723f6887-db9b-43c9-846f-fc1b2cb67237] Video saved to temporary file: temp_videos/723f6887-db9b-43c9-846f-fc1b2cb67237.mp4 +2025-08-18 22:14:11 - INFO - [723f6887-db9b-43c9-846f-fc1b2cb67237] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:14:13 - INFO - [723f6887-db9b-43c9-846f-fc1b2cb67237] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:14:13 - INFO - [723f6887-db9b-43c9-846f-fc1b2cb67237] 30 frames saved to temp_videos/723f6887-db9b-43c9-846f-fc1b2cb67237 +2025-08-18 22:14:14 - INFO - Prompt token length: 2278 +2025-08-18 22:14:34 - INFO - Tokens per second: 12.449120956672985, Avg GPU memory MB: 3164.236328125 +2025-08-18 22:14:34 - INFO - [723f6887-db9b-43c9-846f-fc1b2cb67237] Cleaned up temporary file: temp_videos/723f6887-db9b-43c9-846f-fc1b2cb67237.mp4 +2025-08-18 22:14:34 - INFO - [723f6887-db9b-43c9-846f-fc1b2cb67237] Cleaned up temporary frame directory: temp_videos/723f6887-db9b-43c9-846f-fc1b2cb67237 +2025-08-18 22:15:33 - INFO - [3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:15:33 - INFO - [3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e] Video saved to temporary file: temp_videos/3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e.mp4 +2025-08-18 22:15:33 - INFO - [3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:15:35 - INFO - [3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:15:35 - INFO - [3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e] 30 frames saved to temp_videos/3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e +2025-08-18 22:15:36 - INFO - Prompt token length: 2276 +2025-08-18 22:15:44 - INFO - Tokens per second: 11.375589334843664, Avg GPU memory MB: 3163.87451171875 +2025-08-18 22:15:44 - INFO - [3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e] Cleaned up temporary file: temp_videos/3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e.mp4 +2025-08-18 22:15:44 - INFO - [3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e] Cleaned up temporary frame directory: temp_videos/3a6ec3f7-fcb1-4c31-ab9f-9aed3009645e diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_221804.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_221804.log new file mode 100644 index 0000000000000000000000000000000000000000..d29a14b8e647734188c28fb1d9b5c9041633c221 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_221804.log @@ -0,0 +1,19 @@ +2025-08-18 22:18:04 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:18:07 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:18:15 - INFO - Model loaded in 10.56 seconds +2025-08-18 22:18:15 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 22:18:24 - INFO - [f7d858d3-2aa9-419b-8876-9fa6c707b362] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:18:24 - INFO - [f7d858d3-2aa9-419b-8876-9fa6c707b362] Video saved to temporary file: temp_videos/f7d858d3-2aa9-419b-8876-9fa6c707b362.mp4 +2025-08-18 22:18:24 - INFO - [f7d858d3-2aa9-419b-8876-9fa6c707b362] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:18:28 - INFO - [f7d858d3-2aa9-419b-8876-9fa6c707b362] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:18:28 - INFO - [f7d858d3-2aa9-419b-8876-9fa6c707b362] 30 frames saved to temp_videos/f7d858d3-2aa9-419b-8876-9fa6c707b362 +2025-08-18 22:18:29 - INFO - Prompt token length: 2276 +2025-08-18 22:18:39 - INFO - Tokens per second: 9.237412977708425, Avg GPU memory MB: 3164.08056640625 +2025-08-18 22:18:39 - ERROR - [f7d858d3-2aa9-419b-8876-9fa6c707b362] An error occurred during processing: [Errno 2] No such file or directory: 'outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_221804.json' +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/local_deploy/infer.py", line 110, in video_inference + with open(os.path.join(OUTPUT_DIR, f"{start_time}.json"), "w") as f: + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +FileNotFoundError: [Errno 2] No such file or directory: 'outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_221804.json' +2025-08-18 22:18:39 - INFO - [f7d858d3-2aa9-419b-8876-9fa6c707b362] Cleaned up temporary file: temp_videos/f7d858d3-2aa9-419b-8876-9fa6c707b362.mp4 +2025-08-18 22:18:39 - INFO - [f7d858d3-2aa9-419b-8876-9fa6c707b362] Cleaned up temporary frame directory: temp_videos/f7d858d3-2aa9-419b-8876-9fa6c707b362 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_222505.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_222505.log new file mode 100644 index 0000000000000000000000000000000000000000..d416c7940b144f9ff86714778669a36c04a834a6 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_222505.log @@ -0,0 +1,18 @@ +2025-08-18 22:25:05 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:25:08 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:25:15 - INFO - Model loaded in 10.72 seconds +2025-08-18 22:25:15 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 22:25:32 - INFO - [a4aa5634-1f05-4a10-a409-f6f99576382b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:25:32 - INFO - [a4aa5634-1f05-4a10-a409-f6f99576382b] Video saved to temporary file: temp_videos/a4aa5634-1f05-4a10-a409-f6f99576382b.mp4 +2025-08-18 22:25:32 - INFO - [a4aa5634-1f05-4a10-a409-f6f99576382b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:25:36 - INFO - [a4aa5634-1f05-4a10-a409-f6f99576382b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:25:36 - INFO - [a4aa5634-1f05-4a10-a409-f6f99576382b] 30 frames saved to temp_videos/a4aa5634-1f05-4a10-a409-f6f99576382b +2025-08-18 22:25:37 - INFO - Prompt token length: 2276 +2025-08-18 22:25:47 - ERROR - [a4aa5634-1f05-4a10-a409-f6f99576382b] An error occurred during processing: 'avg_gpu_memory_mb' +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/local_deploy/infer.py", line 109, in video_inference + logging.info(f"Tokens per second: {output['tokens_per_second']}, Avg GPU memory MB: {output['avg_gpu_memory_mb']}") + ~~~~~~^^^^^^^^^^^^^^^^^^^^^ +KeyError: 'avg_gpu_memory_mb' +2025-08-18 22:25:47 - INFO - [a4aa5634-1f05-4a10-a409-f6f99576382b] Cleaned up temporary file: temp_videos/a4aa5634-1f05-4a10-a409-f6f99576382b.mp4 +2025-08-18 22:25:47 - INFO - [a4aa5634-1f05-4a10-a409-f6f99576382b] Cleaned up temporary frame directory: temp_videos/a4aa5634-1f05-4a10-a409-f6f99576382b diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_222617.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_222617.log new file mode 100644 index 0000000000000000000000000000000000000000..b96443b696a5185fe14efebe974cd62934613038 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_222617.log @@ -0,0 +1,13 @@ +2025-08-18 22:26:17 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:26:20 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:26:28 - INFO - Model loaded in 10.99 seconds +2025-08-18 22:26:28 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 22:26:32 - INFO - [27d85b80-1b2f-42eb-9084-a747364133e1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:26:32 - INFO - [27d85b80-1b2f-42eb-9084-a747364133e1] Video saved to temporary file: temp_videos/27d85b80-1b2f-42eb-9084-a747364133e1.mp4 +2025-08-18 22:26:32 - INFO - [27d85b80-1b2f-42eb-9084-a747364133e1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:26:36 - INFO - [27d85b80-1b2f-42eb-9084-a747364133e1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:26:36 - INFO - [27d85b80-1b2f-42eb-9084-a747364133e1] 30 frames saved to temp_videos/27d85b80-1b2f-42eb-9084-a747364133e1 +2025-08-18 22:26:37 - INFO - Prompt token length: 2276 +2025-08-18 22:26:48 - INFO - Tokens per second: 8.544413217338054, Peak GPU memory MB: 4498.375 +2025-08-18 22:26:48 - INFO - [27d85b80-1b2f-42eb-9084-a747364133e1] Cleaned up temporary file: temp_videos/27d85b80-1b2f-42eb-9084-a747364133e1.mp4 +2025-08-18 22:26:48 - INFO - [27d85b80-1b2f-42eb-9084-a747364133e1] Cleaned up temporary frame directory: temp_videos/27d85b80-1b2f-42eb-9084-a747364133e1 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_223141.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_223141.log new file mode 100644 index 0000000000000000000000000000000000000000..8403d4f8cc3d1d0d6edaee8f476d9df46e576ed7 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_223141.log @@ -0,0 +1,14 @@ +2025-08-18 22:31:41 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:31:44 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:31:53 - INFO - Model loaded in 12.48 seconds +2025-08-18 22:31:53 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 22:32:49 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:32:49 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] Video saved to temporary file: temp_videos/a0e31fc7-179a-419d-b6eb-a6f05bc2a73f.mp4 +2025-08-18 22:32:49 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:32:53 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:32:53 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] 30 frames saved to temp_videos/a0e31fc7-179a-419d-b6eb-a6f05bc2a73f +2025-08-18 22:32:54 - INFO - Prompt token length: 2276 +2025-08-18 22:33:04 - INFO - Tokens per second: 9.100198479728341, Peak GPU memory MB: 4498.375 +2025-08-18 22:33:04 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] Inference time: 14.89 seconds, CPU usage: 0.0%, CPU core utilization: [0.0, 0.0, 0.0, 0.0] +2025-08-18 22:33:04 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] Cleaned up temporary file: temp_videos/a0e31fc7-179a-419d-b6eb-a6f05bc2a73f.mp4 +2025-08-18 22:33:04 - INFO - [a0e31fc7-179a-419d-b6eb-a6f05bc2a73f] Cleaned up temporary frame directory: temp_videos/a0e31fc7-179a-419d-b6eb-a6f05bc2a73f diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_223603.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_223603.log new file mode 100644 index 0000000000000000000000000000000000000000..d020916ad7ff0d183cc716984e66584143253abf --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_223603.log @@ -0,0 +1,14 @@ +2025-08-18 22:36:03 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:36:05 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:36:13 - INFO - Model loaded in 10.72 seconds +2025-08-18 22:36:13 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 22:36:17 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:36:17 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] Video saved to temporary file: temp_videos/6cf28ab6-d63f-482a-849e-5b626233e7dd.mp4 +2025-08-18 22:36:17 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:36:21 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:36:21 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] 30 frames saved to temp_videos/6cf28ab6-d63f-482a-849e-5b626233e7dd +2025-08-18 22:36:21 - INFO - Prompt token length: 2276 +2025-08-18 22:36:32 - INFO - Tokens per second: 9.058665203909582, Peak GPU memory MB: 4498.375 +2025-08-18 22:36:32 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] Inference time: 14.38 seconds, CPU usage: 64.5%, CPU core utilization: [61.5, 64.5, 60.3, 71.7] +2025-08-18 22:36:32 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] Cleaned up temporary file: temp_videos/6cf28ab6-d63f-482a-849e-5b626233e7dd.mp4 +2025-08-18 22:36:32 - INFO - [6cf28ab6-d63f-482a-849e-5b626233e7dd] Cleaned up temporary frame directory: temp_videos/6cf28ab6-d63f-482a-849e-5b626233e7dd diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_224148.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_224148.log new file mode 100644 index 0000000000000000000000000000000000000000..8b3fb9ebc4dae92692fa6f7c1d35ace2bf9f4a3e --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_224148.log @@ -0,0 +1,14 @@ +2025-08-18 22:41:48 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:41:51 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:41:59 - INFO - Model loaded in 10.83 seconds +2025-08-18 22:41:59 - INFO - GPU Memory Usage after model load: 2.31 GB +2025-08-18 22:42:01 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:42:01 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] Video saved to temporary file: temp_videos/7d67f1c8-a6a2-41b3-88da-3f3ddf85842f.mp4 +2025-08-18 22:42:01 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:42:05 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:42:06 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] 30 frames saved to temp_videos/7d67f1c8-a6a2-41b3-88da-3f3ddf85842f +2025-08-18 22:42:06 - INFO - Prompt token length: 2276 +2025-08-18 22:42:17 - INFO - Tokens per second: 8.892949630488515, Peak GPU memory MB: 4498.375 +2025-08-18 22:42:17 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] Inference time: 15.41 seconds, CPU usage: 79.7%, CPU core utilization: [80.4, 78.0, 77.4, 82.9] +2025-08-18 22:42:17 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] Cleaned up temporary file: temp_videos/7d67f1c8-a6a2-41b3-88da-3f3ddf85842f.mp4 +2025-08-18 22:42:17 - INFO - [7d67f1c8-a6a2-41b3-88da-3f3ddf85842f] Cleaned up temporary frame directory: temp_videos/7d67f1c8-a6a2-41b3-88da-3f3ddf85842f diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_224556.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_224556.log new file mode 100644 index 0000000000000000000000000000000000000000..0dba63faacd60e2c494439d9d98e03321d0c0fdd --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250818_224556.log @@ -0,0 +1,1414 @@ +2025-08-18 22:45:56 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-18 22:45:58 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-18 22:46:07 - INFO - Model loaded in 11.84 seconds +2025-08-18 22:46:07 - INFO - GPU Memory Usage after model load: 2369.47 MB +2025-08-18 22:46:12 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 22:46:12 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] Video saved to temporary file: temp_videos/075bda2f-0861-4df1-a4c3-f1a5c2043fa7.mp4 +2025-08-18 22:46:12 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:46:15 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:46:16 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] 30 frames saved to temp_videos/075bda2f-0861-4df1-a4c3-f1a5c2043fa7 +2025-08-18 22:46:16 - INFO - Prompt token length: 2276 +2025-08-18 22:46:27 - INFO - Tokens per second: 8.63232791462771, Peak GPU memory MB: 4498.375 +2025-08-18 22:46:27 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] Inference time: 14.99 seconds, CPU usage: 62.9%, CPU core utilization: [55.4, 63.5, 66.5, 66.0] +2025-08-18 22:46:27 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] Cleaned up temporary file: temp_videos/075bda2f-0861-4df1-a4c3-f1a5c2043fa7.mp4 +2025-08-18 22:46:27 - INFO - [075bda2f-0861-4df1-a4c3-f1a5c2043fa7] Cleaned up temporary frame directory: temp_videos/075bda2f-0861-4df1-a4c3-f1a5c2043fa7 +2025-08-18 22:46:27 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_002.mp4' +2025-08-18 22:46:27 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] Video saved to temporary file: temp_videos/5dfba7a1-1f6b-4bd9-b477-006bbc95e576.mp4 +2025-08-18 22:46:27 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:46:32 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:46:33 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] 30 frames saved to temp_videos/5dfba7a1-1f6b-4bd9-b477-006bbc95e576 +2025-08-18 22:46:33 - INFO - Prompt token length: 2276 +2025-08-18 22:46:41 - INFO - Tokens per second: 11.221055325865173, Peak GPU memory MB: 4498.375 +2025-08-18 22:46:41 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] Inference time: 13.72 seconds, CPU usage: 55.2%, CPU core utilization: [40.0, 47.2, 91.9, 41.7] +2025-08-18 22:46:41 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] Cleaned up temporary file: temp_videos/5dfba7a1-1f6b-4bd9-b477-006bbc95e576.mp4 +2025-08-18 22:46:41 - INFO - [5dfba7a1-1f6b-4bd9-b477-006bbc95e576] Cleaned up temporary frame directory: temp_videos/5dfba7a1-1f6b-4bd9-b477-006bbc95e576 +2025-08-18 22:46:41 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_003.mp4' +2025-08-18 22:46:41 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] Video saved to temporary file: temp_videos/a73b6783-5330-48ed-9e0f-bb31ba877a9c.mp4 +2025-08-18 22:46:41 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:46:46 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:46:46 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] 30 frames saved to temp_videos/a73b6783-5330-48ed-9e0f-bb31ba877a9c +2025-08-18 22:46:47 - INFO - Prompt token length: 2276 +2025-08-18 22:46:53 - INFO - Tokens per second: 9.905211987818262, Peak GPU memory MB: 4498.375 +2025-08-18 22:46:53 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] Inference time: 11.70 seconds, CPU usage: 57.9%, CPU core utilization: [79.6, 46.9, 59.2, 45.7] +2025-08-18 22:46:53 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] Cleaned up temporary file: temp_videos/a73b6783-5330-48ed-9e0f-bb31ba877a9c.mp4 +2025-08-18 22:46:53 - INFO - [a73b6783-5330-48ed-9e0f-bb31ba877a9c] Cleaned up temporary frame directory: temp_videos/a73b6783-5330-48ed-9e0f-bb31ba877a9c +2025-08-18 22:46:53 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_004.mp4' +2025-08-18 22:46:53 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] Video saved to temporary file: temp_videos/a484506a-eb35-4334-ab2d-9b3b4ac09de3.mp4 +2025-08-18 22:46:53 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:46:58 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:46:58 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] 30 frames saved to temp_videos/a484506a-eb35-4334-ab2d-9b3b4ac09de3 +2025-08-18 22:46:58 - INFO - Prompt token length: 2276 +2025-08-18 22:47:05 - INFO - Tokens per second: 10.499316897571724, Peak GPU memory MB: 4498.375 +2025-08-18 22:47:05 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] Inference time: 12.50 seconds, CPU usage: 54.7%, CPU core utilization: [40.6, 88.9, 46.3, 43.0] +2025-08-18 22:47:05 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] Cleaned up temporary file: temp_videos/a484506a-eb35-4334-ab2d-9b3b4ac09de3.mp4 +2025-08-18 22:47:05 - INFO - [a484506a-eb35-4334-ab2d-9b3b4ac09de3] Cleaned up temporary frame directory: temp_videos/a484506a-eb35-4334-ab2d-9b3b4ac09de3 +2025-08-18 22:47:05 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_005.mp4' +2025-08-18 22:47:05 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] Video saved to temporary file: temp_videos/c8faf576-22d8-4d4c-92ed-3b957bdd9469.mp4 +2025-08-18 22:47:05 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:47:10 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:47:11 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] 30 frames saved to temp_videos/c8faf576-22d8-4d4c-92ed-3b957bdd9469 +2025-08-18 22:47:11 - INFO - Prompt token length: 2276 +2025-08-18 22:47:17 - INFO - Tokens per second: 10.14404021542223, Peak GPU memory MB: 4498.375 +2025-08-18 22:47:17 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] Inference time: 12.04 seconds, CPU usage: 58.1%, CPU core utilization: [96.0, 48.8, 43.1, 44.4] +2025-08-18 22:47:17 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] Cleaned up temporary file: temp_videos/c8faf576-22d8-4d4c-92ed-3b957bdd9469.mp4 +2025-08-18 22:47:17 - INFO - [c8faf576-22d8-4d4c-92ed-3b957bdd9469] Cleaned up temporary frame directory: temp_videos/c8faf576-22d8-4d4c-92ed-3b957bdd9469 +2025-08-18 22:47:17 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_006.mp4' +2025-08-18 22:47:17 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] Video saved to temporary file: temp_videos/80b0511c-c198-4a01-9b50-64b7d4cb8a77.mp4 +2025-08-18 22:47:17 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:47:22 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:47:23 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] 30 frames saved to temp_videos/80b0511c-c198-4a01-9b50-64b7d4cb8a77 +2025-08-18 22:47:23 - INFO - Prompt token length: 2276 +2025-08-18 22:47:30 - INFO - Tokens per second: 10.486728250956414, Peak GPU memory MB: 4498.375 +2025-08-18 22:47:30 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] Inference time: 12.43 seconds, CPU usage: 56.6%, CPU core utilization: [43.1, 42.8, 89.8, 50.6] +2025-08-18 22:47:30 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] Cleaned up temporary file: temp_videos/80b0511c-c198-4a01-9b50-64b7d4cb8a77.mp4 +2025-08-18 22:47:30 - INFO - [80b0511c-c198-4a01-9b50-64b7d4cb8a77] Cleaned up temporary frame directory: temp_videos/80b0511c-c198-4a01-9b50-64b7d4cb8a77 +2025-08-18 22:47:30 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_007.mp4' +2025-08-18 22:47:30 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] Video saved to temporary file: temp_videos/350781fa-fdcd-49b9-ad16-cc460380dd85.mp4 +2025-08-18 22:47:30 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:47:37 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:47:37 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] 30 frames saved to temp_videos/350781fa-fdcd-49b9-ad16-cc460380dd85 +2025-08-18 22:47:37 - INFO - Prompt token length: 2276 +2025-08-18 22:47:49 - INFO - Tokens per second: 11.410479416659124, Peak GPU memory MB: 4498.375 +2025-08-18 22:47:49 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] Inference time: 19.68 seconds, CPU usage: 64.9%, CPU core utilization: [72.0, 58.3, 56.2, 73.1] +2025-08-18 22:47:49 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] Cleaned up temporary file: temp_videos/350781fa-fdcd-49b9-ad16-cc460380dd85.mp4 +2025-08-18 22:47:49 - INFO - [350781fa-fdcd-49b9-ad16-cc460380dd85] Cleaned up temporary frame directory: temp_videos/350781fa-fdcd-49b9-ad16-cc460380dd85 +2025-08-18 22:47:49 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_008.mp4' +2025-08-18 22:47:49 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] Video saved to temporary file: temp_videos/6a816775-7c8c-40e3-9a4e-44e96029aaf1.mp4 +2025-08-18 22:47:49 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:47:55 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:47:55 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] 30 frames saved to temp_videos/6a816775-7c8c-40e3-9a4e-44e96029aaf1 +2025-08-18 22:47:55 - INFO - Prompt token length: 2276 +2025-08-18 22:48:14 - INFO - Tokens per second: 13.557291314513737, Peak GPU memory MB: 4498.375 +2025-08-18 22:48:14 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] Inference time: 24.19 seconds, CPU usage: 42.2%, CPU core utilization: [62.8, 27.4, 52.1, 26.5] +2025-08-18 22:48:14 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] Cleaned up temporary file: temp_videos/6a816775-7c8c-40e3-9a4e-44e96029aaf1.mp4 +2025-08-18 22:48:14 - INFO - [6a816775-7c8c-40e3-9a4e-44e96029aaf1] Cleaned up temporary frame directory: temp_videos/6a816775-7c8c-40e3-9a4e-44e96029aaf1 +2025-08-18 22:48:14 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_009.mp4' +2025-08-18 22:48:14 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] Video saved to temporary file: temp_videos/8c272ac0-0fcc-4ae2-8091-2feee8ef6994.mp4 +2025-08-18 22:48:14 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:48:21 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:48:22 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] 30 frames saved to temp_videos/8c272ac0-0fcc-4ae2-8091-2feee8ef6994 +2025-08-18 22:48:22 - INFO - Prompt token length: 2276 +2025-08-18 22:48:30 - INFO - Tokens per second: 9.70410623359028, Peak GPU memory MB: 4498.375 +2025-08-18 22:48:30 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] Inference time: 16.81 seconds, CPU usage: 84.7%, CPU core utilization: [74.6, 85.3, 94.7, 84.1] +2025-08-18 22:48:31 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] Cleaned up temporary file: temp_videos/8c272ac0-0fcc-4ae2-8091-2feee8ef6994.mp4 +2025-08-18 22:48:31 - INFO - [8c272ac0-0fcc-4ae2-8091-2feee8ef6994] Cleaned up temporary frame directory: temp_videos/8c272ac0-0fcc-4ae2-8091-2feee8ef6994 +2025-08-18 22:48:31 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_010.mp4' +2025-08-18 22:48:31 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] Video saved to temporary file: temp_videos/fe507c08-75ee-4256-97be-8d8240483bc2.mp4 +2025-08-18 22:48:31 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:48:41 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:48:41 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] 30 frames saved to temp_videos/fe507c08-75ee-4256-97be-8d8240483bc2 +2025-08-18 22:48:42 - INFO - Prompt token length: 2276 +2025-08-18 22:48:53 - INFO - Tokens per second: 11.67231814743956, Peak GPU memory MB: 4498.375 +2025-08-18 22:48:53 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] Inference time: 22.01 seconds, CPU usage: 69.1%, CPU core utilization: [78.4, 59.3, 77.0, 61.8] +2025-08-18 22:48:53 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] Cleaned up temporary file: temp_videos/fe507c08-75ee-4256-97be-8d8240483bc2.mp4 +2025-08-18 22:48:53 - INFO - [fe507c08-75ee-4256-97be-8d8240483bc2] Cleaned up temporary frame directory: temp_videos/fe507c08-75ee-4256-97be-8d8240483bc2 +2025-08-18 22:48:53 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_011.mp4' +2025-08-18 22:48:53 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] Video saved to temporary file: temp_videos/fc606044-d1d2-4eba-9137-1c30c3e7ef03.mp4 +2025-08-18 22:48:53 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:49:05 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:49:05 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] 30 frames saved to temp_videos/fc606044-d1d2-4eba-9137-1c30c3e7ef03 +2025-08-18 22:49:06 - INFO - Prompt token length: 2276 +2025-08-18 22:49:19 - INFO - Tokens per second: 8.777664788366206, Peak GPU memory MB: 4498.375 +2025-08-18 22:49:19 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] Inference time: 26.16 seconds, CPU usage: 96.7%, CPU core utilization: [96.2, 97.1, 96.3, 97.3] +2025-08-18 22:49:19 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] Cleaned up temporary file: temp_videos/fc606044-d1d2-4eba-9137-1c30c3e7ef03.mp4 +2025-08-18 22:49:19 - INFO - [fc606044-d1d2-4eba-9137-1c30c3e7ef03] Cleaned up temporary frame directory: temp_videos/fc606044-d1d2-4eba-9137-1c30c3e7ef03 +2025-08-18 22:49:19 - INFO - [435b0218-2980-4933-b072-761122b930a7] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_012.mp4' +2025-08-18 22:49:19 - INFO - [435b0218-2980-4933-b072-761122b930a7] Video saved to temporary file: temp_videos/435b0218-2980-4933-b072-761122b930a7.mp4 +2025-08-18 22:49:19 - INFO - [435b0218-2980-4933-b072-761122b930a7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:49:29 - INFO - [435b0218-2980-4933-b072-761122b930a7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:49:29 - INFO - [435b0218-2980-4933-b072-761122b930a7] 30 frames saved to temp_videos/435b0218-2980-4933-b072-761122b930a7 +2025-08-18 22:49:30 - INFO - Prompt token length: 2276 +2025-08-18 22:49:38 - INFO - Tokens per second: 9.52422650849717, Peak GPU memory MB: 4498.375 +2025-08-18 22:49:38 - INFO - [435b0218-2980-4933-b072-761122b930a7] Inference time: 18.76 seconds, CPU usage: 85.4%, CPU core utilization: [81.5, 84.7, 88.4, 86.7] +2025-08-18 22:49:38 - INFO - [435b0218-2980-4933-b072-761122b930a7] Cleaned up temporary file: temp_videos/435b0218-2980-4933-b072-761122b930a7.mp4 +2025-08-18 22:49:38 - INFO - [435b0218-2980-4933-b072-761122b930a7] Cleaned up temporary frame directory: temp_videos/435b0218-2980-4933-b072-761122b930a7 +2025-08-18 22:49:38 - INFO - [664241e4-f4dd-46d0-836e-383061784629] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_013.mp4' +2025-08-18 22:49:38 - INFO - [664241e4-f4dd-46d0-836e-383061784629] Video saved to temporary file: temp_videos/664241e4-f4dd-46d0-836e-383061784629.mp4 +2025-08-18 22:49:38 - INFO - [664241e4-f4dd-46d0-836e-383061784629] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:49:43 - INFO - [664241e4-f4dd-46d0-836e-383061784629] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:49:43 - INFO - [664241e4-f4dd-46d0-836e-383061784629] 30 frames saved to temp_videos/664241e4-f4dd-46d0-836e-383061784629 +2025-08-18 22:49:43 - INFO - Prompt token length: 2276 +2025-08-18 22:49:50 - INFO - Tokens per second: 10.458143876674457, Peak GPU memory MB: 4498.375 +2025-08-18 22:49:50 - INFO - [664241e4-f4dd-46d0-836e-383061784629] Inference time: 12.46 seconds, CPU usage: 57.1%, CPU core utilization: [44.0, 81.0, 48.2, 55.0] +2025-08-18 22:49:50 - INFO - [664241e4-f4dd-46d0-836e-383061784629] Cleaned up temporary file: temp_videos/664241e4-f4dd-46d0-836e-383061784629.mp4 +2025-08-18 22:49:50 - INFO - [664241e4-f4dd-46d0-836e-383061784629] Cleaned up temporary frame directory: temp_videos/664241e4-f4dd-46d0-836e-383061784629 +2025-08-18 22:49:50 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_014.mp4' +2025-08-18 22:49:50 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] Video saved to temporary file: temp_videos/c88b94a9-9488-4175-a650-dec5a8b21b8a.mp4 +2025-08-18 22:49:50 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:49:55 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:49:55 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] 30 frames saved to temp_videos/c88b94a9-9488-4175-a650-dec5a8b21b8a +2025-08-18 22:49:55 - INFO - Prompt token length: 2276 +2025-08-18 22:50:03 - INFO - Tokens per second: 11.021376017625661, Peak GPU memory MB: 4498.375 +2025-08-18 22:50:03 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] Inference time: 12.68 seconds, CPU usage: 53.1%, CPU core utilization: [51.2, 46.7, 78.5, 36.4] +2025-08-18 22:50:03 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] Cleaned up temporary file: temp_videos/c88b94a9-9488-4175-a650-dec5a8b21b8a.mp4 +2025-08-18 22:50:03 - INFO - [c88b94a9-9488-4175-a650-dec5a8b21b8a] Cleaned up temporary frame directory: temp_videos/c88b94a9-9488-4175-a650-dec5a8b21b8a +2025-08-18 22:50:03 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_015.mp4' +2025-08-18 22:50:03 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] Video saved to temporary file: temp_videos/5c899150-18ab-4cba-a79a-751c212374f8.mp4 +2025-08-18 22:50:03 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:50:08 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:50:08 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] 30 frames saved to temp_videos/5c899150-18ab-4cba-a79a-751c212374f8 +2025-08-18 22:50:09 - INFO - Prompt token length: 2276 +2025-08-18 22:50:17 - INFO - Tokens per second: 11.233124536609099, Peak GPU memory MB: 4498.375 +2025-08-18 22:50:17 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] Inference time: 13.79 seconds, CPU usage: 54.9%, CPU core utilization: [53.5, 52.3, 67.9, 45.9] +2025-08-18 22:50:17 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] Cleaned up temporary file: temp_videos/5c899150-18ab-4cba-a79a-751c212374f8.mp4 +2025-08-18 22:50:17 - INFO - [5c899150-18ab-4cba-a79a-751c212374f8] Cleaned up temporary frame directory: temp_videos/5c899150-18ab-4cba-a79a-751c212374f8 +2025-08-18 22:50:17 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_016.mp4' +2025-08-18 22:50:17 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] Video saved to temporary file: temp_videos/ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9.mp4 +2025-08-18 22:50:17 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:50:21 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:50:21 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] 30 frames saved to temp_videos/ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9 +2025-08-18 22:50:22 - INFO - Prompt token length: 2276 +2025-08-18 22:50:29 - INFO - Tokens per second: 11.011243446779766, Peak GPU memory MB: 4498.375 +2025-08-18 22:50:29 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] Inference time: 12.41 seconds, CPU usage: 51.2%, CPU core utilization: [33.8, 74.0, 58.5, 38.3] +2025-08-18 22:50:29 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] Cleaned up temporary file: temp_videos/ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9.mp4 +2025-08-18 22:50:29 - INFO - [ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9] Cleaned up temporary frame directory: temp_videos/ab528d2a-bc83-4257-bf9f-ebfb9c47bfa9 +2025-08-18 22:50:29 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_017.mp4' +2025-08-18 22:50:29 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] Video saved to temporary file: temp_videos/60125b02-d360-48c0-a0a5-a95d75617096.mp4 +2025-08-18 22:50:29 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:50:35 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:50:35 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] 30 frames saved to temp_videos/60125b02-d360-48c0-a0a5-a95d75617096 +2025-08-18 22:50:36 - INFO - Prompt token length: 2276 +2025-08-18 22:50:54 - INFO - Tokens per second: 13.381462815949803, Peak GPU memory MB: 4498.375 +2025-08-18 22:50:54 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] Inference time: 24.94 seconds, CPU usage: 46.1%, CPU core utilization: [30.6, 30.3, 31.0, 92.3] +2025-08-18 22:50:54 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] Cleaned up temporary file: temp_videos/60125b02-d360-48c0-a0a5-a95d75617096.mp4 +2025-08-18 22:50:54 - INFO - [60125b02-d360-48c0-a0a5-a95d75617096] Cleaned up temporary frame directory: temp_videos/60125b02-d360-48c0-a0a5-a95d75617096 +2025-08-18 22:50:54 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_018.mp4' +2025-08-18 22:50:54 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] Video saved to temporary file: temp_videos/426c70a1-950a-43e3-a6a9-a2c6f1c28bfb.mp4 +2025-08-18 22:50:54 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:50:59 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:50:59 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] 30 frames saved to temp_videos/426c70a1-950a-43e3-a6a9-a2c6f1c28bfb +2025-08-18 22:51:00 - INFO - Prompt token length: 2276 +2025-08-18 22:51:19 - INFO - Tokens per second: 13.132389275625835, Peak GPU memory MB: 4498.375 +2025-08-18 22:51:19 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] Inference time: 24.58 seconds, CPU usage: 47.9%, CPU core utilization: [38.8, 32.2, 37.2, 83.5] +2025-08-18 22:51:19 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] Cleaned up temporary file: temp_videos/426c70a1-950a-43e3-a6a9-a2c6f1c28bfb.mp4 +2025-08-18 22:51:19 - INFO - [426c70a1-950a-43e3-a6a9-a2c6f1c28bfb] Cleaned up temporary frame directory: temp_videos/426c70a1-950a-43e3-a6a9-a2c6f1c28bfb +2025-08-18 22:51:19 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_019.mp4' +2025-08-18 22:51:19 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] Video saved to temporary file: temp_videos/109dd4ed-ffb9-4b81-8053-2fa832f05e49.mp4 +2025-08-18 22:51:19 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:51:25 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:51:25 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] 30 frames saved to temp_videos/109dd4ed-ffb9-4b81-8053-2fa832f05e49 +2025-08-18 22:51:25 - INFO - Prompt token length: 2276 +2025-08-18 22:51:31 - INFO - Tokens per second: 9.361486629055515, Peak GPU memory MB: 4498.375 +2025-08-18 22:51:31 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] Inference time: 11.82 seconds, CPU usage: 59.5%, CPU core utilization: [80.9, 50.5, 53.4, 53.4] +2025-08-18 22:51:31 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] Cleaned up temporary file: temp_videos/109dd4ed-ffb9-4b81-8053-2fa832f05e49.mp4 +2025-08-18 22:51:31 - INFO - [109dd4ed-ffb9-4b81-8053-2fa832f05e49] Cleaned up temporary frame directory: temp_videos/109dd4ed-ffb9-4b81-8053-2fa832f05e49 +2025-08-18 22:51:31 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_020.mp4' +2025-08-18 22:51:31 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] Video saved to temporary file: temp_videos/5260ac0f-2fcf-48dd-a03f-d5c943c4e758.mp4 +2025-08-18 22:51:31 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:51:36 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:51:36 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] 30 frames saved to temp_videos/5260ac0f-2fcf-48dd-a03f-d5c943c4e758 +2025-08-18 22:51:36 - INFO - Prompt token length: 2276 +2025-08-18 22:51:43 - INFO - Tokens per second: 10.609050771309763, Peak GPU memory MB: 4498.375 +2025-08-18 22:51:43 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] Inference time: 12.48 seconds, CPU usage: 54.3%, CPU core utilization: [43.7, 54.5, 44.5, 74.7] +2025-08-18 22:51:43 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] Cleaned up temporary file: temp_videos/5260ac0f-2fcf-48dd-a03f-d5c943c4e758.mp4 +2025-08-18 22:51:43 - INFO - [5260ac0f-2fcf-48dd-a03f-d5c943c4e758] Cleaned up temporary frame directory: temp_videos/5260ac0f-2fcf-48dd-a03f-d5c943c4e758 +2025-08-18 22:51:43 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_021.mp4' +2025-08-18 22:51:43 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] Video saved to temporary file: temp_videos/accfb971-034b-4c14-809b-daf433dbfbb8.mp4 +2025-08-18 22:51:43 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:51:49 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:51:49 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] 30 frames saved to temp_videos/accfb971-034b-4c14-809b-daf433dbfbb8 +2025-08-18 22:51:49 - INFO - Prompt token length: 2276 +2025-08-18 22:52:08 - INFO - Tokens per second: 13.340801969764685, Peak GPU memory MB: 4498.375 +2025-08-18 22:52:08 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] Inference time: 24.81 seconds, CPU usage: 45.7%, CPU core utilization: [51.6, 35.5, 27.5, 68.1] +2025-08-18 22:52:08 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] Cleaned up temporary file: temp_videos/accfb971-034b-4c14-809b-daf433dbfbb8.mp4 +2025-08-18 22:52:08 - INFO - [accfb971-034b-4c14-809b-daf433dbfbb8] Cleaned up temporary frame directory: temp_videos/accfb971-034b-4c14-809b-daf433dbfbb8 +2025-08-18 22:52:08 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_022.mp4' +2025-08-18 22:52:08 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] Video saved to temporary file: temp_videos/b2836d31-8645-4311-9b90-888c14e5e3bc.mp4 +2025-08-18 22:52:08 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:52:13 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:52:13 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] 30 frames saved to temp_videos/b2836d31-8645-4311-9b90-888c14e5e3bc +2025-08-18 22:52:14 - INFO - Prompt token length: 2276 +2025-08-18 22:52:32 - INFO - Tokens per second: 13.607361563257028, Peak GPU memory MB: 4498.375 +2025-08-18 22:52:32 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] Inference time: 24.13 seconds, CPU usage: 42.3%, CPU core utilization: [59.0, 29.3, 51.7, 29.4] +2025-08-18 22:52:32 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] Cleaned up temporary file: temp_videos/b2836d31-8645-4311-9b90-888c14e5e3bc.mp4 +2025-08-18 22:52:32 - INFO - [b2836d31-8645-4311-9b90-888c14e5e3bc] Cleaned up temporary frame directory: temp_videos/b2836d31-8645-4311-9b90-888c14e5e3bc +2025-08-18 22:52:32 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_023.mp4' +2025-08-18 22:52:32 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] Video saved to temporary file: temp_videos/b0803029-6b9a-4e0d-9511-87bf9871abcc.mp4 +2025-08-18 22:52:32 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:52:38 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:52:38 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] 30 frames saved to temp_videos/b0803029-6b9a-4e0d-9511-87bf9871abcc +2025-08-18 22:52:38 - INFO - Prompt token length: 2276 +2025-08-18 22:52:48 - INFO - Tokens per second: 11.106636903428864, Peak GPU memory MB: 4498.375 +2025-08-18 22:52:48 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] Inference time: 15.97 seconds, CPU usage: 65.9%, CPU core utilization: [55.3, 59.6, 60.6, 87.8] +2025-08-18 22:52:48 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] Cleaned up temporary file: temp_videos/b0803029-6b9a-4e0d-9511-87bf9871abcc.mp4 +2025-08-18 22:52:48 - INFO - [b0803029-6b9a-4e0d-9511-87bf9871abcc] Cleaned up temporary frame directory: temp_videos/b0803029-6b9a-4e0d-9511-87bf9871abcc +2025-08-18 22:52:48 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_024.mp4' +2025-08-18 22:52:48 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] Video saved to temporary file: temp_videos/78840bfa-ea30-4212-8d30-e30b757eb356.mp4 +2025-08-18 22:52:48 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:52:53 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:52:53 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] 30 frames saved to temp_videos/78840bfa-ea30-4212-8d30-e30b757eb356 +2025-08-18 22:52:54 - INFO - Prompt token length: 2276 +2025-08-18 22:53:02 - INFO - Tokens per second: 11.590113251233504, Peak GPU memory MB: 4498.375 +2025-08-18 22:53:02 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] Inference time: 13.96 seconds, CPU usage: 52.1%, CPU core utilization: [44.3, 43.5, 79.6, 40.9] +2025-08-18 22:53:02 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] Cleaned up temporary file: temp_videos/78840bfa-ea30-4212-8d30-e30b757eb356.mp4 +2025-08-18 22:53:02 - INFO - [78840bfa-ea30-4212-8d30-e30b757eb356] Cleaned up temporary frame directory: temp_videos/78840bfa-ea30-4212-8d30-e30b757eb356 +2025-08-18 22:53:02 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_025.mp4' +2025-08-18 22:53:02 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] Video saved to temporary file: temp_videos/5022338d-3173-4d0b-8b39-c5ae03c77f1f.mp4 +2025-08-18 22:53:02 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:53:08 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:53:08 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] 30 frames saved to temp_videos/5022338d-3173-4d0b-8b39-c5ae03c77f1f +2025-08-18 22:53:08 - INFO - Prompt token length: 2276 +2025-08-18 22:53:14 - INFO - Tokens per second: 10.033342008096056, Peak GPU memory MB: 4498.375 +2025-08-18 22:53:14 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] Inference time: 12.19 seconds, CPU usage: 58.1%, CPU core utilization: [49.8, 47.9, 44.3, 90.4] +2025-08-18 22:53:14 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] Cleaned up temporary file: temp_videos/5022338d-3173-4d0b-8b39-c5ae03c77f1f.mp4 +2025-08-18 22:53:14 - INFO - [5022338d-3173-4d0b-8b39-c5ae03c77f1f] Cleaned up temporary frame directory: temp_videos/5022338d-3173-4d0b-8b39-c5ae03c77f1f +2025-08-18 22:53:14 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_026.mp4' +2025-08-18 22:53:14 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] Video saved to temporary file: temp_videos/19f73586-9c57-4a25-bff3-ecc9a54a2151.mp4 +2025-08-18 22:53:14 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:53:20 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:53:20 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] 30 frames saved to temp_videos/19f73586-9c57-4a25-bff3-ecc9a54a2151 +2025-08-18 22:53:20 - INFO - Prompt token length: 2276 +2025-08-18 22:53:29 - INFO - Tokens per second: 11.728915252837204, Peak GPU memory MB: 4498.375 +2025-08-18 22:53:29 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] Inference time: 14.87 seconds, CPU usage: 51.6%, CPU core utilization: [47.1, 61.9, 47.1, 50.4] +2025-08-18 22:53:29 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] Cleaned up temporary file: temp_videos/19f73586-9c57-4a25-bff3-ecc9a54a2151.mp4 +2025-08-18 22:53:29 - INFO - [19f73586-9c57-4a25-bff3-ecc9a54a2151] Cleaned up temporary frame directory: temp_videos/19f73586-9c57-4a25-bff3-ecc9a54a2151 +2025-08-18 22:53:29 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_027.mp4' +2025-08-18 22:53:29 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] Video saved to temporary file: temp_videos/ecc5736c-a500-4670-97f5-2c7c84068d5c.mp4 +2025-08-18 22:53:29 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:53:34 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:53:34 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] 30 frames saved to temp_videos/ecc5736c-a500-4670-97f5-2c7c84068d5c +2025-08-18 22:53:35 - INFO - Prompt token length: 2276 +2025-08-18 22:53:45 - INFO - Tokens per second: 12.103293187102274, Peak GPU memory MB: 4498.375 +2025-08-18 22:53:45 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] Inference time: 15.81 seconds, CPU usage: 47.9%, CPU core utilization: [41.1, 89.1, 33.4, 27.8] +2025-08-18 22:53:45 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] Cleaned up temporary file: temp_videos/ecc5736c-a500-4670-97f5-2c7c84068d5c.mp4 +2025-08-18 22:53:45 - INFO - [ecc5736c-a500-4670-97f5-2c7c84068d5c] Cleaned up temporary frame directory: temp_videos/ecc5736c-a500-4670-97f5-2c7c84068d5c +2025-08-18 22:53:45 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_028.mp4' +2025-08-18 22:53:45 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] Video saved to temporary file: temp_videos/31d523f9-edb8-42ba-a6a9-577444e3d7a5.mp4 +2025-08-18 22:53:45 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:53:50 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:53:51 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] 30 frames saved to temp_videos/31d523f9-edb8-42ba-a6a9-577444e3d7a5 +2025-08-18 22:53:51 - INFO - Prompt token length: 2276 +2025-08-18 22:53:59 - INFO - Tokens per second: 11.55616894095496, Peak GPU memory MB: 4498.375 +2025-08-18 22:53:59 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] Inference time: 14.27 seconds, CPU usage: 53.3%, CPU core utilization: [53.1, 37.3, 77.1, 45.7] +2025-08-18 22:53:59 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] Cleaned up temporary file: temp_videos/31d523f9-edb8-42ba-a6a9-577444e3d7a5.mp4 +2025-08-18 22:53:59 - INFO - [31d523f9-edb8-42ba-a6a9-577444e3d7a5] Cleaned up temporary frame directory: temp_videos/31d523f9-edb8-42ba-a6a9-577444e3d7a5 +2025-08-18 22:54:00 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_029.mp4' +2025-08-18 22:54:00 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] Video saved to temporary file: temp_videos/16ba2ba0-02d8-45b0-a45a-439f87dc04aa.mp4 +2025-08-18 22:54:00 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:54:05 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:54:05 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] 30 frames saved to temp_videos/16ba2ba0-02d8-45b0-a45a-439f87dc04aa +2025-08-18 22:54:05 - INFO - Prompt token length: 2276 +2025-08-18 22:54:12 - INFO - Tokens per second: 10.1662890855494, Peak GPU memory MB: 4498.375 +2025-08-18 22:54:12 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] Inference time: 12.25 seconds, CPU usage: 58.0%, CPU core utilization: [46.4, 67.0, 46.7, 71.7] +2025-08-18 22:54:12 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] Cleaned up temporary file: temp_videos/16ba2ba0-02d8-45b0-a45a-439f87dc04aa.mp4 +2025-08-18 22:54:12 - INFO - [16ba2ba0-02d8-45b0-a45a-439f87dc04aa] Cleaned up temporary frame directory: temp_videos/16ba2ba0-02d8-45b0-a45a-439f87dc04aa +2025-08-18 22:54:12 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_030.mp4' +2025-08-18 22:54:12 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] Video saved to temporary file: temp_videos/fe59bbbc-ba47-4676-8cd7-6fd88c752075.mp4 +2025-08-18 22:54:12 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:54:18 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:54:18 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] 30 frames saved to temp_videos/fe59bbbc-ba47-4676-8cd7-6fd88c752075 +2025-08-18 22:54:18 - INFO - Prompt token length: 2276 +2025-08-18 22:54:25 - INFO - Tokens per second: 10.61298795535298, Peak GPU memory MB: 4498.375 +2025-08-18 22:54:25 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] Inference time: 13.25 seconds, CPU usage: 57.6%, CPU core utilization: [65.5, 77.1, 44.8, 43.3] +2025-08-18 22:54:25 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] Cleaned up temporary file: temp_videos/fe59bbbc-ba47-4676-8cd7-6fd88c752075.mp4 +2025-08-18 22:54:25 - INFO - [fe59bbbc-ba47-4676-8cd7-6fd88c752075] Cleaned up temporary frame directory: temp_videos/fe59bbbc-ba47-4676-8cd7-6fd88c752075 +2025-08-18 22:54:25 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_031.mp4' +2025-08-18 22:54:25 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] Video saved to temporary file: temp_videos/df7e5a2b-938d-4cb0-91fd-cc7b435c6b11.mp4 +2025-08-18 22:54:25 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:54:30 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:54:31 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] 30 frames saved to temp_videos/df7e5a2b-938d-4cb0-91fd-cc7b435c6b11 +2025-08-18 22:54:31 - INFO - Prompt token length: 2276 +2025-08-18 22:54:38 - INFO - Tokens per second: 10.523895968014092, Peak GPU memory MB: 4498.375 +2025-08-18 22:54:38 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] Inference time: 12.69 seconds, CPU usage: 56.1%, CPU core utilization: [39.5, 45.7, 92.6, 46.4] +2025-08-18 22:54:38 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] Cleaned up temporary file: temp_videos/df7e5a2b-938d-4cb0-91fd-cc7b435c6b11.mp4 +2025-08-18 22:54:38 - INFO - [df7e5a2b-938d-4cb0-91fd-cc7b435c6b11] Cleaned up temporary frame directory: temp_videos/df7e5a2b-938d-4cb0-91fd-cc7b435c6b11 +2025-08-18 22:54:38 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_032.mp4' +2025-08-18 22:54:38 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] Video saved to temporary file: temp_videos/65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28.mp4 +2025-08-18 22:54:38 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:54:43 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:54:44 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] 30 frames saved to temp_videos/65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28 +2025-08-18 22:54:44 - INFO - Prompt token length: 2276 +2025-08-18 22:54:51 - INFO - Tokens per second: 10.668539079165651, Peak GPU memory MB: 4498.375 +2025-08-18 22:54:51 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] Inference time: 13.17 seconds, CPU usage: 56.8%, CPU core utilization: [43.8, 81.1, 46.1, 56.2] +2025-08-18 22:54:51 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] Cleaned up temporary file: temp_videos/65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28.mp4 +2025-08-18 22:54:51 - INFO - [65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28] Cleaned up temporary frame directory: temp_videos/65f43bb3-e8f2-4eb6-aaa3-89b1e4b66d28 +2025-08-18 22:54:51 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_033.mp4' +2025-08-18 22:54:51 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] Video saved to temporary file: temp_videos/c3623f82-9912-42a6-8cb3-d157dfe0d1ce.mp4 +2025-08-18 22:54:51 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:54:56 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:54:56 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] 30 frames saved to temp_videos/c3623f82-9912-42a6-8cb3-d157dfe0d1ce +2025-08-18 22:54:57 - INFO - Prompt token length: 2276 +2025-08-18 22:55:15 - INFO - Tokens per second: 13.537389723016279, Peak GPU memory MB: 4498.375 +2025-08-18 22:55:15 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] Inference time: 24.00 seconds, CPU usage: 41.7%, CPU core utilization: [31.5, 26.5, 85.6, 23.1] +2025-08-18 22:55:15 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] Cleaned up temporary file: temp_videos/c3623f82-9912-42a6-8cb3-d157dfe0d1ce.mp4 +2025-08-18 22:55:15 - INFO - [c3623f82-9912-42a6-8cb3-d157dfe0d1ce] Cleaned up temporary frame directory: temp_videos/c3623f82-9912-42a6-8cb3-d157dfe0d1ce +2025-08-18 22:55:15 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_034.mp4' +2025-08-18 22:55:15 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] Video saved to temporary file: temp_videos/d948b84f-015d-4fb8-bde7-63dc8a399e73.mp4 +2025-08-18 22:55:15 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:55:20 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:55:21 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] 30 frames saved to temp_videos/d948b84f-015d-4fb8-bde7-63dc8a399e73 +2025-08-18 22:55:21 - INFO - Prompt token length: 2276 +2025-08-18 22:55:39 - INFO - Tokens per second: 13.678692848425554, Peak GPU memory MB: 4498.375 +2025-08-18 22:55:39 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] Inference time: 24.14 seconds, CPU usage: 42.8%, CPU core utilization: [61.6, 24.0, 56.6, 29.1] +2025-08-18 22:55:39 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] Cleaned up temporary file: temp_videos/d948b84f-015d-4fb8-bde7-63dc8a399e73.mp4 +2025-08-18 22:55:39 - INFO - [d948b84f-015d-4fb8-bde7-63dc8a399e73] Cleaned up temporary frame directory: temp_videos/d948b84f-015d-4fb8-bde7-63dc8a399e73 +2025-08-18 22:55:39 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_035.mp4' +2025-08-18 22:55:39 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] Video saved to temporary file: temp_videos/fc53496d-7086-4b4d-802b-604559f57e61.mp4 +2025-08-18 22:55:39 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:55:44 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:55:44 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] 30 frames saved to temp_videos/fc53496d-7086-4b4d-802b-604559f57e61 +2025-08-18 22:55:45 - INFO - Prompt token length: 2276 +2025-08-18 22:56:03 - INFO - Tokens per second: 13.376309732887606, Peak GPU memory MB: 4498.375 +2025-08-18 22:56:03 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] Inference time: 23.93 seconds, CPU usage: 42.1%, CPU core utilization: [28.9, 48.0, 23.0, 68.5] +2025-08-18 22:56:03 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] Cleaned up temporary file: temp_videos/fc53496d-7086-4b4d-802b-604559f57e61.mp4 +2025-08-18 22:56:03 - INFO - [fc53496d-7086-4b4d-802b-604559f57e61] Cleaned up temporary frame directory: temp_videos/fc53496d-7086-4b4d-802b-604559f57e61 +2025-08-18 22:56:03 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_036.mp4' +2025-08-18 22:56:03 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] Video saved to temporary file: temp_videos/cd17d03e-624a-4214-b0cc-016302f516b5.mp4 +2025-08-18 22:56:03 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:56:08 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:56:09 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] 30 frames saved to temp_videos/cd17d03e-624a-4214-b0cc-016302f516b5 +2025-08-18 22:56:09 - INFO - Prompt token length: 2276 +2025-08-18 22:56:27 - INFO - Tokens per second: 13.62507043215766, Peak GPU memory MB: 4498.375 +2025-08-18 22:56:27 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] Inference time: 24.01 seconds, CPU usage: 42.7%, CPU core utilization: [46.8, 22.9, 71.1, 30.0] +2025-08-18 22:56:27 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] Cleaned up temporary file: temp_videos/cd17d03e-624a-4214-b0cc-016302f516b5.mp4 +2025-08-18 22:56:27 - INFO - [cd17d03e-624a-4214-b0cc-016302f516b5] Cleaned up temporary frame directory: temp_videos/cd17d03e-624a-4214-b0cc-016302f516b5 +2025-08-18 22:56:27 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_037.mp4' +2025-08-18 22:56:27 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] Video saved to temporary file: temp_videos/6ace4d11-5e88-4f8b-a1f2-4ed9802584ab.mp4 +2025-08-18 22:56:27 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:56:33 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:56:33 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] 30 frames saved to temp_videos/6ace4d11-5e88-4f8b-a1f2-4ed9802584ab +2025-08-18 22:56:33 - INFO - Prompt token length: 2276 +2025-08-18 22:56:42 - INFO - Tokens per second: 11.276273877237807, Peak GPU memory MB: 4498.375 +2025-08-18 22:56:42 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] Inference time: 14.25 seconds, CPU usage: 52.6%, CPU core utilization: [40.0, 75.7, 38.8, 56.0] +2025-08-18 22:56:42 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] Cleaned up temporary file: temp_videos/6ace4d11-5e88-4f8b-a1f2-4ed9802584ab.mp4 +2025-08-18 22:56:42 - INFO - [6ace4d11-5e88-4f8b-a1f2-4ed9802584ab] Cleaned up temporary frame directory: temp_videos/6ace4d11-5e88-4f8b-a1f2-4ed9802584ab +2025-08-18 22:56:42 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_038.mp4' +2025-08-18 22:56:42 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] Video saved to temporary file: temp_videos/799ee65c-3f65-4208-aa08-e3e74c3eab46.mp4 +2025-08-18 22:56:42 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:56:47 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:56:47 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] 30 frames saved to temp_videos/799ee65c-3f65-4208-aa08-e3e74c3eab46 +2025-08-18 22:56:47 - INFO - Prompt token length: 2276 +2025-08-18 22:57:06 - INFO - Tokens per second: 13.497013928697614, Peak GPU memory MB: 4498.375 +2025-08-18 22:57:06 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] Inference time: 24.17 seconds, CPU usage: 43.0%, CPU core utilization: [26.9, 88.5, 26.0, 30.4] +2025-08-18 22:57:06 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] Cleaned up temporary file: temp_videos/799ee65c-3f65-4208-aa08-e3e74c3eab46.mp4 +2025-08-18 22:57:06 - INFO - [799ee65c-3f65-4208-aa08-e3e74c3eab46] Cleaned up temporary frame directory: temp_videos/799ee65c-3f65-4208-aa08-e3e74c3eab46 +2025-08-18 22:57:06 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_039.mp4' +2025-08-18 22:57:06 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] Video saved to temporary file: temp_videos/6b17b052-e042-40fb-bd52-89ab8c8a5b14.mp4 +2025-08-18 22:57:06 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:57:12 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:57:12 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] 30 frames saved to temp_videos/6b17b052-e042-40fb-bd52-89ab8c8a5b14 +2025-08-18 22:57:12 - INFO - Prompt token length: 2276 +2025-08-18 22:57:19 - INFO - Tokens per second: 10.531527007366298, Peak GPU memory MB: 4498.375 +2025-08-18 22:57:19 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] Inference time: 13.09 seconds, CPU usage: 56.6%, CPU core utilization: [46.6, 92.1, 46.4, 41.0] +2025-08-18 22:57:19 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] Cleaned up temporary file: temp_videos/6b17b052-e042-40fb-bd52-89ab8c8a5b14.mp4 +2025-08-18 22:57:19 - INFO - [6b17b052-e042-40fb-bd52-89ab8c8a5b14] Cleaned up temporary frame directory: temp_videos/6b17b052-e042-40fb-bd52-89ab8c8a5b14 +2025-08-18 22:57:19 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_040.mp4' +2025-08-18 22:57:19 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] Video saved to temporary file: temp_videos/c7a0e52c-dc5a-4737-8588-3d4a61b2467a.mp4 +2025-08-18 22:57:19 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:57:24 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:57:24 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] 30 frames saved to temp_videos/c7a0e52c-dc5a-4737-8588-3d4a61b2467a +2025-08-18 22:57:24 - INFO - Prompt token length: 2276 +2025-08-18 22:57:43 - INFO - Tokens per second: 13.475227004241777, Peak GPU memory MB: 4498.375 +2025-08-18 22:57:43 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] Inference time: 23.81 seconds, CPU usage: 42.2%, CPU core utilization: [29.1, 37.1, 22.2, 80.2] +2025-08-18 22:57:43 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] Cleaned up temporary file: temp_videos/c7a0e52c-dc5a-4737-8588-3d4a61b2467a.mp4 +2025-08-18 22:57:43 - INFO - [c7a0e52c-dc5a-4737-8588-3d4a61b2467a] Cleaned up temporary frame directory: temp_videos/c7a0e52c-dc5a-4737-8588-3d4a61b2467a +2025-08-18 22:57:43 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_041.mp4' +2025-08-18 22:57:43 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] Video saved to temporary file: temp_videos/5e3f5136-347c-4889-a332-2a48852a1973.mp4 +2025-08-18 22:57:43 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:57:47 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:57:48 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] 30 frames saved to temp_videos/5e3f5136-347c-4889-a332-2a48852a1973 +2025-08-18 22:57:48 - INFO - Prompt token length: 2276 +2025-08-18 22:57:58 - INFO - Tokens per second: 11.916277351916051, Peak GPU memory MB: 4498.375 +2025-08-18 22:57:58 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] Inference time: 14.90 seconds, CPU usage: 47.7%, CPU core utilization: [44.8, 54.4, 56.9, 34.9] +2025-08-18 22:57:58 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] Cleaned up temporary file: temp_videos/5e3f5136-347c-4889-a332-2a48852a1973.mp4 +2025-08-18 22:57:58 - INFO - [5e3f5136-347c-4889-a332-2a48852a1973] Cleaned up temporary frame directory: temp_videos/5e3f5136-347c-4889-a332-2a48852a1973 +2025-08-18 22:57:58 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_042.mp4' +2025-08-18 22:57:58 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] Video saved to temporary file: temp_videos/e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76.mp4 +2025-08-18 22:57:58 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:58:04 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:58:04 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] 30 frames saved to temp_videos/e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76 +2025-08-18 22:58:04 - INFO - Prompt token length: 2276 +2025-08-18 22:58:13 - INFO - Tokens per second: 11.492664443775725, Peak GPU memory MB: 4498.375 +2025-08-18 22:58:13 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] Inference time: 14.74 seconds, CPU usage: 54.2%, CPU core utilization: [49.6, 46.7, 78.2, 42.4] +2025-08-18 22:58:13 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] Cleaned up temporary file: temp_videos/e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76.mp4 +2025-08-18 22:58:13 - INFO - [e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76] Cleaned up temporary frame directory: temp_videos/e2dcfb3d-e7cc-4a08-acb3-90f7d5c15b76 +2025-08-18 22:58:13 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_043.mp4' +2025-08-18 22:58:13 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] Video saved to temporary file: temp_videos/244000f7-3e28-4e49-b380-f138b203cef2.mp4 +2025-08-18 22:58:13 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:58:18 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:58:18 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] 30 frames saved to temp_videos/244000f7-3e28-4e49-b380-f138b203cef2 +2025-08-18 22:58:19 - INFO - Prompt token length: 2276 +2025-08-18 22:58:27 - INFO - Tokens per second: 11.568536648727322, Peak GPU memory MB: 4498.375 +2025-08-18 22:58:27 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] Inference time: 14.69 seconds, CPU usage: 53.3%, CPU core utilization: [58.4, 44.7, 72.1, 37.8] +2025-08-18 22:58:27 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] Cleaned up temporary file: temp_videos/244000f7-3e28-4e49-b380-f138b203cef2.mp4 +2025-08-18 22:58:27 - INFO - [244000f7-3e28-4e49-b380-f138b203cef2] Cleaned up temporary frame directory: temp_videos/244000f7-3e28-4e49-b380-f138b203cef2 +2025-08-18 22:58:27 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_044.mp4' +2025-08-18 22:58:27 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] Video saved to temporary file: temp_videos/d5f7483d-13fe-45b4-8c70-bb5ad2788078.mp4 +2025-08-18 22:58:27 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:58:32 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:58:32 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] 30 frames saved to temp_videos/d5f7483d-13fe-45b4-8c70-bb5ad2788078 +2025-08-18 22:58:33 - INFO - Prompt token length: 2276 +2025-08-18 22:58:43 - INFO - Tokens per second: 12.110009497452609, Peak GPU memory MB: 4498.375 +2025-08-18 22:58:43 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] Inference time: 15.44 seconds, CPU usage: 48.5%, CPU core utilization: [45.0, 46.6, 70.6, 31.8] +2025-08-18 22:58:43 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] Cleaned up temporary file: temp_videos/d5f7483d-13fe-45b4-8c70-bb5ad2788078.mp4 +2025-08-18 22:58:43 - INFO - [d5f7483d-13fe-45b4-8c70-bb5ad2788078] Cleaned up temporary frame directory: temp_videos/d5f7483d-13fe-45b4-8c70-bb5ad2788078 +2025-08-18 22:58:43 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_045.mp4' +2025-08-18 22:58:43 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] Video saved to temporary file: temp_videos/04a58573-7af6-4a7d-a187-f79e7d87ecd2.mp4 +2025-08-18 22:58:43 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:58:47 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:58:47 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] 30 frames saved to temp_videos/04a58573-7af6-4a7d-a187-f79e7d87ecd2 +2025-08-18 22:58:48 - INFO - Prompt token length: 2276 +2025-08-18 22:59:06 - INFO - Tokens per second: 13.585724473842552, Peak GPU memory MB: 4498.375 +2025-08-18 22:59:06 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] Inference time: 23.46 seconds, CPU usage: 42.3%, CPU core utilization: [23.5, 24.6, 93.4, 27.4] +2025-08-18 22:59:06 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] Cleaned up temporary file: temp_videos/04a58573-7af6-4a7d-a187-f79e7d87ecd2.mp4 +2025-08-18 22:59:06 - INFO - [04a58573-7af6-4a7d-a187-f79e7d87ecd2] Cleaned up temporary frame directory: temp_videos/04a58573-7af6-4a7d-a187-f79e7d87ecd2 +2025-08-18 22:59:06 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_046.mp4' +2025-08-18 22:59:06 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] Video saved to temporary file: temp_videos/51d14ff3-476b-402a-8025-b95f9074dcd6.mp4 +2025-08-18 22:59:06 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:59:12 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:59:12 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] 30 frames saved to temp_videos/51d14ff3-476b-402a-8025-b95f9074dcd6 +2025-08-18 22:59:12 - INFO - Prompt token length: 2276 +2025-08-18 22:59:20 - INFO - Tokens per second: 11.270841311476088, Peak GPU memory MB: 4498.375 +2025-08-18 22:59:20 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] Inference time: 13.94 seconds, CPU usage: 54.2%, CPU core utilization: [57.3, 52.0, 42.5, 64.9] +2025-08-18 22:59:20 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] Cleaned up temporary file: temp_videos/51d14ff3-476b-402a-8025-b95f9074dcd6.mp4 +2025-08-18 22:59:20 - INFO - [51d14ff3-476b-402a-8025-b95f9074dcd6] Cleaned up temporary frame directory: temp_videos/51d14ff3-476b-402a-8025-b95f9074dcd6 +2025-08-18 22:59:20 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_047.mp4' +2025-08-18 22:59:20 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] Video saved to temporary file: temp_videos/3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a.mp4 +2025-08-18 22:59:20 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:59:25 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:59:25 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] 30 frames saved to temp_videos/3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a +2025-08-18 22:59:26 - INFO - Prompt token length: 2276 +2025-08-18 22:59:34 - INFO - Tokens per second: 11.372159490189508, Peak GPU memory MB: 4498.375 +2025-08-18 22:59:34 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] Inference time: 13.46 seconds, CPU usage: 52.4%, CPU core utilization: [53.7, 39.4, 78.4, 38.1] +2025-08-18 22:59:34 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] Cleaned up temporary file: temp_videos/3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a.mp4 +2025-08-18 22:59:34 - INFO - [3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a] Cleaned up temporary frame directory: temp_videos/3d84cabe-4bf5-4ce4-a8ef-c898865d9e9a +2025-08-18 22:59:34 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_048.mp4' +2025-08-18 22:59:34 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] Video saved to temporary file: temp_videos/3b44f853-7199-409b-b9e5-bf43a71342ef.mp4 +2025-08-18 22:59:34 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:59:39 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:59:39 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] 30 frames saved to temp_videos/3b44f853-7199-409b-b9e5-bf43a71342ef +2025-08-18 22:59:39 - INFO - Prompt token length: 2276 +2025-08-18 22:59:48 - INFO - Tokens per second: 11.519826832960204, Peak GPU memory MB: 4498.375 +2025-08-18 22:59:48 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] Inference time: 14.11 seconds, CPU usage: 52.0%, CPU core utilization: [65.6, 37.9, 44.9, 59.6] +2025-08-18 22:59:48 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] Cleaned up temporary file: temp_videos/3b44f853-7199-409b-b9e5-bf43a71342ef.mp4 +2025-08-18 22:59:48 - INFO - [3b44f853-7199-409b-b9e5-bf43a71342ef] Cleaned up temporary frame directory: temp_videos/3b44f853-7199-409b-b9e5-bf43a71342ef +2025-08-18 22:59:48 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_049.mp4' +2025-08-18 22:59:48 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] Video saved to temporary file: temp_videos/e944425e-5ba9-4dd4-acdf-79f0dfe1ba09.mp4 +2025-08-18 22:59:48 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 22:59:53 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 22:59:53 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] 30 frames saved to temp_videos/e944425e-5ba9-4dd4-acdf-79f0dfe1ba09 +2025-08-18 22:59:53 - INFO - Prompt token length: 2276 +2025-08-18 23:00:00 - INFO - Tokens per second: 10.918856106314124, Peak GPU memory MB: 4498.375 +2025-08-18 23:00:00 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] Inference time: 12.41 seconds, CPU usage: 52.8%, CPU core utilization: [61.8, 51.0, 55.3, 43.1] +2025-08-18 23:00:00 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] Cleaned up temporary file: temp_videos/e944425e-5ba9-4dd4-acdf-79f0dfe1ba09.mp4 +2025-08-18 23:00:00 - INFO - [e944425e-5ba9-4dd4-acdf-79f0dfe1ba09] Cleaned up temporary frame directory: temp_videos/e944425e-5ba9-4dd4-acdf-79f0dfe1ba09 +2025-08-18 23:00:00 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_050.mp4' +2025-08-18 23:00:00 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] Video saved to temporary file: temp_videos/e071de33-f805-4530-8741-f62f3ad4bf46.mp4 +2025-08-18 23:00:01 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:00:06 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:00:06 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] 30 frames saved to temp_videos/e071de33-f805-4530-8741-f62f3ad4bf46 +2025-08-18 23:00:06 - INFO - Prompt token length: 2276 +2025-08-18 23:00:14 - INFO - Tokens per second: 11.285178690107939, Peak GPU memory MB: 4498.375 +2025-08-18 23:00:14 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] Inference time: 13.93 seconds, CPU usage: 52.8%, CPU core utilization: [44.2, 38.7, 37.1, 90.9] +2025-08-18 23:00:14 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] Cleaned up temporary file: temp_videos/e071de33-f805-4530-8741-f62f3ad4bf46.mp4 +2025-08-18 23:00:14 - INFO - [e071de33-f805-4530-8741-f62f3ad4bf46] Cleaned up temporary frame directory: temp_videos/e071de33-f805-4530-8741-f62f3ad4bf46 +2025-08-18 23:00:14 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_051.mp4' +2025-08-18 23:00:14 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] Video saved to temporary file: temp_videos/86940c58-16cc-4771-b0ff-c1aa062432eb.mp4 +2025-08-18 23:00:14 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:00:20 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:00:20 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] 30 frames saved to temp_videos/86940c58-16cc-4771-b0ff-c1aa062432eb +2025-08-18 23:00:20 - INFO - Prompt token length: 2276 +2025-08-18 23:00:28 - INFO - Tokens per second: 10.676742482272044, Peak GPU memory MB: 4498.375 +2025-08-18 23:00:28 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] Inference time: 13.05 seconds, CPU usage: 55.0%, CPU core utilization: [40.6, 89.8, 44.2, 45.4] +2025-08-18 23:00:28 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] Cleaned up temporary file: temp_videos/86940c58-16cc-4771-b0ff-c1aa062432eb.mp4 +2025-08-18 23:00:28 - INFO - [86940c58-16cc-4771-b0ff-c1aa062432eb] Cleaned up temporary frame directory: temp_videos/86940c58-16cc-4771-b0ff-c1aa062432eb +2025-08-18 23:00:28 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_052.mp4' +2025-08-18 23:00:28 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] Video saved to temporary file: temp_videos/81b94ac7-cac5-457f-bd20-59fd15f12943.mp4 +2025-08-18 23:00:28 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:00:32 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:00:32 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] 30 frames saved to temp_videos/81b94ac7-cac5-457f-bd20-59fd15f12943 +2025-08-18 23:00:33 - INFO - Prompt token length: 2276 +2025-08-18 23:00:51 - INFO - Tokens per second: 13.622099739318918, Peak GPU memory MB: 4498.375 +2025-08-18 23:00:51 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] Inference time: 23.43 seconds, CPU usage: 42.2%, CPU core utilization: [31.3, 23.5, 85.9, 28.0] +2025-08-18 23:00:51 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] Cleaned up temporary file: temp_videos/81b94ac7-cac5-457f-bd20-59fd15f12943.mp4 +2025-08-18 23:00:51 - INFO - [81b94ac7-cac5-457f-bd20-59fd15f12943] Cleaned up temporary frame directory: temp_videos/81b94ac7-cac5-457f-bd20-59fd15f12943 +2025-08-18 23:00:51 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_053.mp4' +2025-08-18 23:00:51 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] Video saved to temporary file: temp_videos/efba522a-2522-43d4-a898-9dc8c3ed0e73.mp4 +2025-08-18 23:00:51 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:00:56 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:00:56 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] 30 frames saved to temp_videos/efba522a-2522-43d4-a898-9dc8c3ed0e73 +2025-08-18 23:00:57 - INFO - Prompt token length: 2276 +2025-08-18 23:01:15 - INFO - Tokens per second: 13.347533319146407, Peak GPU memory MB: 4498.375 +2025-08-18 23:01:15 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] Inference time: 24.23 seconds, CPU usage: 42.6%, CPU core utilization: [62.2, 26.4, 56.8, 25.2] +2025-08-18 23:01:15 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] Cleaned up temporary file: temp_videos/efba522a-2522-43d4-a898-9dc8c3ed0e73.mp4 +2025-08-18 23:01:15 - INFO - [efba522a-2522-43d4-a898-9dc8c3ed0e73] Cleaned up temporary frame directory: temp_videos/efba522a-2522-43d4-a898-9dc8c3ed0e73 +2025-08-18 23:01:15 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_054.mp4' +2025-08-18 23:01:15 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] Video saved to temporary file: temp_videos/006b941e-077a-434b-9305-64ca8dafb004.mp4 +2025-08-18 23:01:15 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:01:20 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:01:20 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] 30 frames saved to temp_videos/006b941e-077a-434b-9305-64ca8dafb004 +2025-08-18 23:01:21 - INFO - Prompt token length: 2276 +2025-08-18 23:01:28 - INFO - Tokens per second: 10.53785016721129, Peak GPU memory MB: 4498.375 +2025-08-18 23:01:28 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] Inference time: 12.31 seconds, CPU usage: 55.8%, CPU core utilization: [55.0, 50.0, 77.9, 40.5] +2025-08-18 23:01:28 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] Cleaned up temporary file: temp_videos/006b941e-077a-434b-9305-64ca8dafb004.mp4 +2025-08-18 23:01:28 - INFO - [006b941e-077a-434b-9305-64ca8dafb004] Cleaned up temporary frame directory: temp_videos/006b941e-077a-434b-9305-64ca8dafb004 +2025-08-18 23:01:28 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_055.mp4' +2025-08-18 23:01:28 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] Video saved to temporary file: temp_videos/d839ff80-6608-461e-ac9c-705a44262a2c.mp4 +2025-08-18 23:01:28 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:01:33 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:01:33 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] 30 frames saved to temp_videos/d839ff80-6608-461e-ac9c-705a44262a2c +2025-08-18 23:01:34 - INFO - Prompt token length: 2276 +2025-08-18 23:01:41 - INFO - Tokens per second: 10.75895843424124, Peak GPU memory MB: 4498.375 +2025-08-18 23:01:41 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] Inference time: 13.70 seconds, CPU usage: 55.6%, CPU core utilization: [54.4, 41.3, 79.8, 46.6] +2025-08-18 23:01:41 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] Cleaned up temporary file: temp_videos/d839ff80-6608-461e-ac9c-705a44262a2c.mp4 +2025-08-18 23:01:41 - INFO - [d839ff80-6608-461e-ac9c-705a44262a2c] Cleaned up temporary frame directory: temp_videos/d839ff80-6608-461e-ac9c-705a44262a2c +2025-08-18 23:01:41 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_056.mp4' +2025-08-18 23:01:41 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] Video saved to temporary file: temp_videos/0eff5449-6a26-4685-951e-2d234d38afcd.mp4 +2025-08-18 23:01:41 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:01:46 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:01:47 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] 30 frames saved to temp_videos/0eff5449-6a26-4685-951e-2d234d38afcd +2025-08-18 23:01:47 - INFO - Prompt token length: 2276 +2025-08-18 23:01:54 - INFO - Tokens per second: 10.594214965012135, Peak GPU memory MB: 4498.375 +2025-08-18 23:01:54 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] Inference time: 12.68 seconds, CPU usage: 54.8%, CPU core utilization: [37.5, 42.0, 97.2, 42.3] +2025-08-18 23:01:54 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] Cleaned up temporary file: temp_videos/0eff5449-6a26-4685-951e-2d234d38afcd.mp4 +2025-08-18 23:01:54 - INFO - [0eff5449-6a26-4685-951e-2d234d38afcd] Cleaned up temporary frame directory: temp_videos/0eff5449-6a26-4685-951e-2d234d38afcd +2025-08-18 23:01:54 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_057.mp4' +2025-08-18 23:01:54 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] Video saved to temporary file: temp_videos/49ff462d-dde1-4890-a70d-705c35a65ff5.mp4 +2025-08-18 23:01:54 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:01:59 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:02:00 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] 30 frames saved to temp_videos/49ff462d-dde1-4890-a70d-705c35a65ff5 +2025-08-18 23:02:00 - INFO - Prompt token length: 2276 +2025-08-18 23:02:08 - INFO - Tokens per second: 11.269221613888531, Peak GPU memory MB: 4498.375 +2025-08-18 23:02:08 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] Inference time: 14.13 seconds, CPU usage: 53.3%, CPU core utilization: [66.9, 41.2, 64.7, 40.3] +2025-08-18 23:02:08 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] Cleaned up temporary file: temp_videos/49ff462d-dde1-4890-a70d-705c35a65ff5.mp4 +2025-08-18 23:02:08 - INFO - [49ff462d-dde1-4890-a70d-705c35a65ff5] Cleaned up temporary frame directory: temp_videos/49ff462d-dde1-4890-a70d-705c35a65ff5 +2025-08-18 23:02:08 - INFO - [f4304769-b360-4c33-824f-6bf090832808] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_058.mp4' +2025-08-18 23:02:08 - INFO - [f4304769-b360-4c33-824f-6bf090832808] Video saved to temporary file: temp_videos/f4304769-b360-4c33-824f-6bf090832808.mp4 +2025-08-18 23:02:08 - INFO - [f4304769-b360-4c33-824f-6bf090832808] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:02:14 - INFO - [f4304769-b360-4c33-824f-6bf090832808] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:02:14 - INFO - [f4304769-b360-4c33-824f-6bf090832808] 30 frames saved to temp_videos/f4304769-b360-4c33-824f-6bf090832808 +2025-08-18 23:02:14 - INFO - Prompt token length: 2276 +2025-08-18 23:02:22 - INFO - Tokens per second: 11.04575295102091, Peak GPU memory MB: 4498.375 +2025-08-18 23:02:22 - INFO - [f4304769-b360-4c33-824f-6bf090832808] Inference time: 13.99 seconds, CPU usage: 55.2%, CPU core utilization: [42.7, 38.4, 42.4, 97.3] +2025-08-18 23:02:22 - INFO - [f4304769-b360-4c33-824f-6bf090832808] Cleaned up temporary file: temp_videos/f4304769-b360-4c33-824f-6bf090832808.mp4 +2025-08-18 23:02:22 - INFO - [f4304769-b360-4c33-824f-6bf090832808] Cleaned up temporary frame directory: temp_videos/f4304769-b360-4c33-824f-6bf090832808 +2025-08-18 23:02:22 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_059.mp4' +2025-08-18 23:02:22 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] Video saved to temporary file: temp_videos/d9c98683-0f98-4a8c-b7db-ecca0cecf48b.mp4 +2025-08-18 23:02:22 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:02:28 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:02:28 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] 30 frames saved to temp_videos/d9c98683-0f98-4a8c-b7db-ecca0cecf48b +2025-08-18 23:02:29 - INFO - Prompt token length: 2276 +2025-08-18 23:02:36 - INFO - Tokens per second: 10.727149599423456, Peak GPU memory MB: 4498.375 +2025-08-18 23:02:36 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] Inference time: 13.41 seconds, CPU usage: 56.3%, CPU core utilization: [40.9, 50.8, 47.5, 86.2] +2025-08-18 23:02:36 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] Cleaned up temporary file: temp_videos/d9c98683-0f98-4a8c-b7db-ecca0cecf48b.mp4 +2025-08-18 23:02:36 - INFO - [d9c98683-0f98-4a8c-b7db-ecca0cecf48b] Cleaned up temporary frame directory: temp_videos/d9c98683-0f98-4a8c-b7db-ecca0cecf48b +2025-08-18 23:02:36 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_060.mp4' +2025-08-18 23:02:36 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] Video saved to temporary file: temp_videos/c515e67f-f312-4e56-9f76-63baafea18fe.mp4 +2025-08-18 23:02:36 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:02:41 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:02:41 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] 30 frames saved to temp_videos/c515e67f-f312-4e56-9f76-63baafea18fe +2025-08-18 23:02:42 - INFO - Prompt token length: 2276 +2025-08-18 23:02:48 - INFO - Tokens per second: 10.042450573700199, Peak GPU memory MB: 4498.375 +2025-08-18 23:02:48 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] Inference time: 12.22 seconds, CPU usage: 56.6%, CPU core utilization: [43.7, 55.8, 45.1, 81.6] +2025-08-18 23:02:48 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] Cleaned up temporary file: temp_videos/c515e67f-f312-4e56-9f76-63baafea18fe.mp4 +2025-08-18 23:02:48 - INFO - [c515e67f-f312-4e56-9f76-63baafea18fe] Cleaned up temporary frame directory: temp_videos/c515e67f-f312-4e56-9f76-63baafea18fe +2025-08-18 23:02:48 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_061.mp4' +2025-08-18 23:02:48 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] Video saved to temporary file: temp_videos/7ef4adca-42f0-441a-934e-7a89de474185.mp4 +2025-08-18 23:02:48 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:02:53 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:02:53 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] 30 frames saved to temp_videos/7ef4adca-42f0-441a-934e-7a89de474185 +2025-08-18 23:02:54 - INFO - Prompt token length: 2276 +2025-08-18 23:03:04 - INFO - Tokens per second: 11.836645817275972, Peak GPU memory MB: 4498.375 +2025-08-18 23:03:04 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] Inference time: 15.41 seconds, CPU usage: 50.4%, CPU core utilization: [76.2, 47.8, 45.2, 32.3] +2025-08-18 23:03:04 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] Cleaned up temporary file: temp_videos/7ef4adca-42f0-441a-934e-7a89de474185.mp4 +2025-08-18 23:03:04 - INFO - [7ef4adca-42f0-441a-934e-7a89de474185] Cleaned up temporary frame directory: temp_videos/7ef4adca-42f0-441a-934e-7a89de474185 +2025-08-18 23:03:04 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_062.mp4' +2025-08-18 23:03:04 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] Video saved to temporary file: temp_videos/1c90644f-f15e-4166-91ed-6cfd756f5345.mp4 +2025-08-18 23:03:04 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:03:09 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:03:09 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] 30 frames saved to temp_videos/1c90644f-f15e-4166-91ed-6cfd756f5345 +2025-08-18 23:03:10 - INFO - Prompt token length: 2276 +2025-08-18 23:03:28 - INFO - Tokens per second: 13.51890528957401, Peak GPU memory MB: 4498.375 +2025-08-18 23:03:28 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] Inference time: 24.69 seconds, CPU usage: 43.4%, CPU core utilization: [66.1, 27.7, 50.8, 28.9] +2025-08-18 23:03:28 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] Cleaned up temporary file: temp_videos/1c90644f-f15e-4166-91ed-6cfd756f5345.mp4 +2025-08-18 23:03:28 - INFO - [1c90644f-f15e-4166-91ed-6cfd756f5345] Cleaned up temporary frame directory: temp_videos/1c90644f-f15e-4166-91ed-6cfd756f5345 +2025-08-18 23:03:28 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_063.mp4' +2025-08-18 23:03:28 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] Video saved to temporary file: temp_videos/8ecf1c91-baa7-4ad3-a045-af2055b234f0.mp4 +2025-08-18 23:03:28 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:03:34 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:03:34 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] 30 frames saved to temp_videos/8ecf1c91-baa7-4ad3-a045-af2055b234f0 +2025-08-18 23:03:35 - INFO - Prompt token length: 2276 +2025-08-18 23:03:41 - INFO - Tokens per second: 10.470370255459905, Peak GPU memory MB: 4498.375 +2025-08-18 23:03:41 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] Inference time: 12.99 seconds, CPU usage: 57.5%, CPU core utilization: [89.5, 46.0, 50.2, 44.2] +2025-08-18 23:03:41 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] Cleaned up temporary file: temp_videos/8ecf1c91-baa7-4ad3-a045-af2055b234f0.mp4 +2025-08-18 23:03:41 - INFO - [8ecf1c91-baa7-4ad3-a045-af2055b234f0] Cleaned up temporary frame directory: temp_videos/8ecf1c91-baa7-4ad3-a045-af2055b234f0 +2025-08-18 23:03:41 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_064.mp4' +2025-08-18 23:03:41 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] Video saved to temporary file: temp_videos/768cf2db-998a-430b-a18c-cee0ce86f57a.mp4 +2025-08-18 23:03:41 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:03:47 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:03:47 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] 30 frames saved to temp_videos/768cf2db-998a-430b-a18c-cee0ce86f57a +2025-08-18 23:03:47 - INFO - Prompt token length: 2276 +2025-08-18 23:03:54 - INFO - Tokens per second: 10.637653036938566, Peak GPU memory MB: 4498.375 +2025-08-18 23:03:54 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] Inference time: 13.02 seconds, CPU usage: 54.6%, CPU core utilization: [60.0, 80.6, 40.0, 37.7] +2025-08-18 23:03:54 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] Cleaned up temporary file: temp_videos/768cf2db-998a-430b-a18c-cee0ce86f57a.mp4 +2025-08-18 23:03:54 - INFO - [768cf2db-998a-430b-a18c-cee0ce86f57a] Cleaned up temporary frame directory: temp_videos/768cf2db-998a-430b-a18c-cee0ce86f57a +2025-08-18 23:03:54 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_065.mp4' +2025-08-18 23:03:54 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] Video saved to temporary file: temp_videos/aed264f9-f2d0-41ea-978b-dfa6b229d55c.mp4 +2025-08-18 23:03:54 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:03:59 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:03:59 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] 30 frames saved to temp_videos/aed264f9-f2d0-41ea-978b-dfa6b229d55c +2025-08-18 23:04:00 - INFO - Prompt token length: 2276 +2025-08-18 23:04:08 - INFO - Tokens per second: 11.118798321181488, Peak GPU memory MB: 4498.375 +2025-08-18 23:04:08 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] Inference time: 13.33 seconds, CPU usage: 53.2%, CPU core utilization: [36.3, 42.5, 92.8, 41.2] +2025-08-18 23:04:08 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] Cleaned up temporary file: temp_videos/aed264f9-f2d0-41ea-978b-dfa6b229d55c.mp4 +2025-08-18 23:04:08 - INFO - [aed264f9-f2d0-41ea-978b-dfa6b229d55c] Cleaned up temporary frame directory: temp_videos/aed264f9-f2d0-41ea-978b-dfa6b229d55c +2025-08-18 23:04:08 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_066.mp4' +2025-08-18 23:04:08 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] Video saved to temporary file: temp_videos/1c61368d-28f5-4388-8f15-59e58db86810.mp4 +2025-08-18 23:04:08 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:04:13 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:04:13 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] 30 frames saved to temp_videos/1c61368d-28f5-4388-8f15-59e58db86810 +2025-08-18 23:04:14 - INFO - Prompt token length: 2276 +2025-08-18 23:04:21 - INFO - Tokens per second: 11.046646657452769, Peak GPU memory MB: 4498.375 +2025-08-18 23:04:21 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] Inference time: 13.33 seconds, CPU usage: 54.8%, CPU core utilization: [41.8, 40.8, 92.8, 43.8] +2025-08-18 23:04:21 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] Cleaned up temporary file: temp_videos/1c61368d-28f5-4388-8f15-59e58db86810.mp4 +2025-08-18 23:04:21 - INFO - [1c61368d-28f5-4388-8f15-59e58db86810] Cleaned up temporary frame directory: temp_videos/1c61368d-28f5-4388-8f15-59e58db86810 +2025-08-18 23:04:21 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_067.mp4' +2025-08-18 23:04:21 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] Video saved to temporary file: temp_videos/e64444fd-c006-4d5f-ac89-6783a3a4f3db.mp4 +2025-08-18 23:04:21 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:04:26 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:04:26 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] 30 frames saved to temp_videos/e64444fd-c006-4d5f-ac89-6783a3a4f3db +2025-08-18 23:04:27 - INFO - Prompt token length: 2276 +2025-08-18 23:04:37 - INFO - Tokens per second: 12.171039547079923, Peak GPU memory MB: 4498.375 +2025-08-18 23:04:37 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] Inference time: 15.97 seconds, CPU usage: 47.4%, CPU core utilization: [41.1, 39.5, 76.6, 32.2] +2025-08-18 23:04:37 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] Cleaned up temporary file: temp_videos/e64444fd-c006-4d5f-ac89-6783a3a4f3db.mp4 +2025-08-18 23:04:37 - INFO - [e64444fd-c006-4d5f-ac89-6783a3a4f3db] Cleaned up temporary frame directory: temp_videos/e64444fd-c006-4d5f-ac89-6783a3a4f3db +2025-08-18 23:04:37 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_068.mp4' +2025-08-18 23:04:37 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] Video saved to temporary file: temp_videos/cc94a1b4-298e-4960-aaaf-9c6f37c66226.mp4 +2025-08-18 23:04:37 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:04:42 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:04:42 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] 30 frames saved to temp_videos/cc94a1b4-298e-4960-aaaf-9c6f37c66226 +2025-08-18 23:04:43 - INFO - Prompt token length: 2276 +2025-08-18 23:04:52 - INFO - Tokens per second: 11.776910119364178, Peak GPU memory MB: 4498.375 +2025-08-18 23:04:52 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] Inference time: 15.09 seconds, CPU usage: 50.9%, CPU core utilization: [43.8, 38.1, 85.2, 36.4] +2025-08-18 23:04:52 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] Cleaned up temporary file: temp_videos/cc94a1b4-298e-4960-aaaf-9c6f37c66226.mp4 +2025-08-18 23:04:52 - INFO - [cc94a1b4-298e-4960-aaaf-9c6f37c66226] Cleaned up temporary frame directory: temp_videos/cc94a1b4-298e-4960-aaaf-9c6f37c66226 +2025-08-18 23:04:52 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_069.mp4' +2025-08-18 23:04:52 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] Video saved to temporary file: temp_videos/be6ef8ed-4bde-40d6-a538-7b33950ca231.mp4 +2025-08-18 23:04:52 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:04:58 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:04:58 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] 30 frames saved to temp_videos/be6ef8ed-4bde-40d6-a538-7b33950ca231 +2025-08-18 23:04:59 - INFO - Prompt token length: 2276 +2025-08-18 23:05:18 - INFO - Tokens per second: 13.303425856169165, Peak GPU memory MB: 4498.375 +2025-08-18 23:05:18 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] Inference time: 25.32 seconds, CPU usage: 44.9%, CPU core utilization: [28.9, 80.1, 28.4, 42.2] +2025-08-18 23:05:18 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] Cleaned up temporary file: temp_videos/be6ef8ed-4bde-40d6-a538-7b33950ca231.mp4 +2025-08-18 23:05:18 - INFO - [be6ef8ed-4bde-40d6-a538-7b33950ca231] Cleaned up temporary frame directory: temp_videos/be6ef8ed-4bde-40d6-a538-7b33950ca231 +2025-08-18 23:05:18 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_070.mp4' +2025-08-18 23:05:18 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] Video saved to temporary file: temp_videos/ae135513-64b0-4bed-a0a6-d3cf52df3956.mp4 +2025-08-18 23:05:18 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:05:24 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:05:24 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] 30 frames saved to temp_videos/ae135513-64b0-4bed-a0a6-d3cf52df3956 +2025-08-18 23:05:24 - INFO - Prompt token length: 2276 +2025-08-18 23:05:35 - INFO - Tokens per second: 12.13954557764283, Peak GPU memory MB: 4498.375 +2025-08-18 23:05:35 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] Inference time: 17.38 seconds, CPU usage: 50.4%, CPU core utilization: [39.7, 58.8, 34.9, 68.0] +2025-08-18 23:05:35 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] Cleaned up temporary file: temp_videos/ae135513-64b0-4bed-a0a6-d3cf52df3956.mp4 +2025-08-18 23:05:35 - INFO - [ae135513-64b0-4bed-a0a6-d3cf52df3956] Cleaned up temporary frame directory: temp_videos/ae135513-64b0-4bed-a0a6-d3cf52df3956 +2025-08-18 23:05:35 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_071.mp4' +2025-08-18 23:05:35 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] Video saved to temporary file: temp_videos/16b10795-6ef8-4c60-bd25-b4d9be61a39a.mp4 +2025-08-18 23:05:35 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:05:40 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:05:40 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] 30 frames saved to temp_videos/16b10795-6ef8-4c60-bd25-b4d9be61a39a +2025-08-18 23:05:40 - INFO - Prompt token length: 2276 +2025-08-18 23:05:47 - INFO - Tokens per second: 10.414739116699462, Peak GPU memory MB: 4498.375 +2025-08-18 23:05:47 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] Inference time: 11.88 seconds, CPU usage: 55.1%, CPU core utilization: [92.1, 46.3, 42.3, 39.7] +2025-08-18 23:05:47 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] Cleaned up temporary file: temp_videos/16b10795-6ef8-4c60-bd25-b4d9be61a39a.mp4 +2025-08-18 23:05:47 - INFO - [16b10795-6ef8-4c60-bd25-b4d9be61a39a] Cleaned up temporary frame directory: temp_videos/16b10795-6ef8-4c60-bd25-b4d9be61a39a +2025-08-18 23:05:47 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_072.mp4' +2025-08-18 23:05:47 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] Video saved to temporary file: temp_videos/5ce48b47-57b9-4a49-83be-c25ac282ed59.mp4 +2025-08-18 23:05:47 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:05:52 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:05:52 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] 30 frames saved to temp_videos/5ce48b47-57b9-4a49-83be-c25ac282ed59 +2025-08-18 23:05:52 - INFO - Prompt token length: 2276 +2025-08-18 23:06:00 - INFO - Tokens per second: 10.734430101804195, Peak GPU memory MB: 4498.375 +2025-08-18 23:06:00 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] Inference time: 12.57 seconds, CPU usage: 52.9%, CPU core utilization: [40.4, 90.9, 42.6, 37.6] +2025-08-18 23:06:00 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] Cleaned up temporary file: temp_videos/5ce48b47-57b9-4a49-83be-c25ac282ed59.mp4 +2025-08-18 23:06:00 - INFO - [5ce48b47-57b9-4a49-83be-c25ac282ed59] Cleaned up temporary frame directory: temp_videos/5ce48b47-57b9-4a49-83be-c25ac282ed59 +2025-08-18 23:06:00 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_073.mp4' +2025-08-18 23:06:00 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] Video saved to temporary file: temp_videos/3d074499-8223-438b-b0fd-0226dd497cab.mp4 +2025-08-18 23:06:00 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:06:04 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:06:05 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] 30 frames saved to temp_videos/3d074499-8223-438b-b0fd-0226dd497cab +2025-08-18 23:06:05 - INFO - Prompt token length: 2276 +2025-08-18 23:06:14 - INFO - Tokens per second: 11.401038439752732, Peak GPU memory MB: 4498.375 +2025-08-18 23:06:14 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] Inference time: 13.87 seconds, CPU usage: 52.1%, CPU core utilization: [86.9, 42.0, 40.8, 38.7] +2025-08-18 23:06:14 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] Cleaned up temporary file: temp_videos/3d074499-8223-438b-b0fd-0226dd497cab.mp4 +2025-08-18 23:06:14 - INFO - [3d074499-8223-438b-b0fd-0226dd497cab] Cleaned up temporary frame directory: temp_videos/3d074499-8223-438b-b0fd-0226dd497cab +2025-08-18 23:06:14 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_074.mp4' +2025-08-18 23:06:14 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] Video saved to temporary file: temp_videos/1cbc8704-d012-43d5-82c2-4e7078b12884.mp4 +2025-08-18 23:06:14 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:06:19 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:06:19 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] 30 frames saved to temp_videos/1cbc8704-d012-43d5-82c2-4e7078b12884 +2025-08-18 23:06:19 - INFO - Prompt token length: 2276 +2025-08-18 23:06:28 - INFO - Tokens per second: 11.397912573524183, Peak GPU memory MB: 4498.375 +2025-08-18 23:06:28 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] Inference time: 14.49 seconds, CPU usage: 52.0%, CPU core utilization: [38.6, 45.3, 37.2, 87.1] +2025-08-18 23:06:28 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] Cleaned up temporary file: temp_videos/1cbc8704-d012-43d5-82c2-4e7078b12884.mp4 +2025-08-18 23:06:28 - INFO - [1cbc8704-d012-43d5-82c2-4e7078b12884] Cleaned up temporary frame directory: temp_videos/1cbc8704-d012-43d5-82c2-4e7078b12884 +2025-08-18 23:06:28 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_075.mp4' +2025-08-18 23:06:28 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] Video saved to temporary file: temp_videos/a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1.mp4 +2025-08-18 23:06:28 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:06:33 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:06:33 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] 30 frames saved to temp_videos/a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1 +2025-08-18 23:06:33 - INFO - Prompt token length: 2276 +2025-08-18 23:06:39 - INFO - Tokens per second: 9.434509967080192, Peak GPU memory MB: 4498.375 +2025-08-18 23:06:39 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] Inference time: 10.79 seconds, CPU usage: 56.6%, CPU core utilization: [45.3, 62.0, 53.9, 65.1] +2025-08-18 23:06:39 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] Cleaned up temporary file: temp_videos/a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1.mp4 +2025-08-18 23:06:39 - INFO - [a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1] Cleaned up temporary frame directory: temp_videos/a8d8e6d3-2b55-4de6-bda2-7a1926e9c6b1 +2025-08-18 23:06:39 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_076.mp4' +2025-08-18 23:06:39 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] Video saved to temporary file: temp_videos/7be9c3e4-53a9-4702-a5d8-29fbdc22247a.mp4 +2025-08-18 23:06:39 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:06:44 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:06:44 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] 30 frames saved to temp_videos/7be9c3e4-53a9-4702-a5d8-29fbdc22247a +2025-08-18 23:06:44 - INFO - Prompt token length: 2276 +2025-08-18 23:06:52 - INFO - Tokens per second: 10.855564263704968, Peak GPU memory MB: 4498.375 +2025-08-18 23:06:52 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] Inference time: 12.64 seconds, CPU usage: 53.3%, CPU core utilization: [38.8, 70.7, 42.6, 61.1] +2025-08-18 23:06:52 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] Cleaned up temporary file: temp_videos/7be9c3e4-53a9-4702-a5d8-29fbdc22247a.mp4 +2025-08-18 23:06:52 - INFO - [7be9c3e4-53a9-4702-a5d8-29fbdc22247a] Cleaned up temporary frame directory: temp_videos/7be9c3e4-53a9-4702-a5d8-29fbdc22247a +2025-08-18 23:06:52 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_077.mp4' +2025-08-18 23:06:52 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] Video saved to temporary file: temp_videos/6451a051-dd9d-46e6-abf2-66b46b1b503a.mp4 +2025-08-18 23:06:52 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:06:57 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:06:57 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] 30 frames saved to temp_videos/6451a051-dd9d-46e6-abf2-66b46b1b503a +2025-08-18 23:06:58 - INFO - Prompt token length: 2276 +2025-08-18 23:07:06 - INFO - Tokens per second: 11.264292443159409, Peak GPU memory MB: 4498.375 +2025-08-18 23:07:06 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] Inference time: 13.96 seconds, CPU usage: 53.6%, CPU core utilization: [70.2, 43.9, 59.9, 40.2] +2025-08-18 23:07:06 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] Cleaned up temporary file: temp_videos/6451a051-dd9d-46e6-abf2-66b46b1b503a.mp4 +2025-08-18 23:07:06 - INFO - [6451a051-dd9d-46e6-abf2-66b46b1b503a] Cleaned up temporary frame directory: temp_videos/6451a051-dd9d-46e6-abf2-66b46b1b503a +2025-08-18 23:07:06 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_078.mp4' +2025-08-18 23:07:06 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] Video saved to temporary file: temp_videos/87ac276a-a3ea-4d34-91eb-4a24e4b0988b.mp4 +2025-08-18 23:07:06 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:07:11 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:07:11 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] 30 frames saved to temp_videos/87ac276a-a3ea-4d34-91eb-4a24e4b0988b +2025-08-18 23:07:11 - INFO - Prompt token length: 2276 +2025-08-18 23:07:20 - INFO - Tokens per second: 11.433634043968842, Peak GPU memory MB: 4498.375 +2025-08-18 23:07:20 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] Inference time: 13.98 seconds, CPU usage: 52.6%, CPU core utilization: [58.9, 63.0, 48.2, 40.3] +2025-08-18 23:07:20 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] Cleaned up temporary file: temp_videos/87ac276a-a3ea-4d34-91eb-4a24e4b0988b.mp4 +2025-08-18 23:07:20 - INFO - [87ac276a-a3ea-4d34-91eb-4a24e4b0988b] Cleaned up temporary frame directory: temp_videos/87ac276a-a3ea-4d34-91eb-4a24e4b0988b +2025-08-18 23:07:20 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_079.mp4' +2025-08-18 23:07:20 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] Video saved to temporary file: temp_videos/eeff6f88-bdbc-42c4-af10-eadd111abd74.mp4 +2025-08-18 23:07:20 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:07:26 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:07:26 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] 30 frames saved to temp_videos/eeff6f88-bdbc-42c4-af10-eadd111abd74 +2025-08-18 23:07:26 - INFO - Prompt token length: 2276 +2025-08-18 23:07:33 - INFO - Tokens per second: 10.660925176180571, Peak GPU memory MB: 4498.375 +2025-08-18 23:07:33 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] Inference time: 13.68 seconds, CPU usage: 57.1%, CPU core utilization: [44.6, 54.0, 67.4, 62.7] +2025-08-18 23:07:33 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] Cleaned up temporary file: temp_videos/eeff6f88-bdbc-42c4-af10-eadd111abd74.mp4 +2025-08-18 23:07:33 - INFO - [eeff6f88-bdbc-42c4-af10-eadd111abd74] Cleaned up temporary frame directory: temp_videos/eeff6f88-bdbc-42c4-af10-eadd111abd74 +2025-08-18 23:07:33 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_080.mp4' +2025-08-18 23:07:33 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] Video saved to temporary file: temp_videos/f5e44a0b-9490-41da-8fad-541a5189ddc2.mp4 +2025-08-18 23:07:33 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:07:40 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:07:40 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] 30 frames saved to temp_videos/f5e44a0b-9490-41da-8fad-541a5189ddc2 +2025-08-18 23:07:40 - INFO - Prompt token length: 2276 +2025-08-18 23:07:59 - INFO - Tokens per second: 13.453729274975233, Peak GPU memory MB: 4498.375 +2025-08-18 23:07:59 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] Inference time: 25.35 seconds, CPU usage: 45.7%, CPU core utilization: [26.3, 30.9, 94.7, 30.6] +2025-08-18 23:07:59 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] Cleaned up temporary file: temp_videos/f5e44a0b-9490-41da-8fad-541a5189ddc2.mp4 +2025-08-18 23:07:59 - INFO - [f5e44a0b-9490-41da-8fad-541a5189ddc2] Cleaned up temporary frame directory: temp_videos/f5e44a0b-9490-41da-8fad-541a5189ddc2 +2025-08-18 23:07:59 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_081.mp4' +2025-08-18 23:07:59 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] Video saved to temporary file: temp_videos/7adc3909-1397-4a36-b2dd-edd998aae077.mp4 +2025-08-18 23:07:59 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:08:04 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:08:04 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] 30 frames saved to temp_videos/7adc3909-1397-4a36-b2dd-edd998aae077 +2025-08-18 23:08:04 - INFO - Prompt token length: 2276 +2025-08-18 23:08:23 - INFO - Tokens per second: 13.213590696537448, Peak GPU memory MB: 4498.375 +2025-08-18 23:08:23 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] Inference time: 24.29 seconds, CPU usage: 42.8%, CPU core utilization: [28.2, 39.4, 23.7, 79.7] +2025-08-18 23:08:23 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] Cleaned up temporary file: temp_videos/7adc3909-1397-4a36-b2dd-edd998aae077.mp4 +2025-08-18 23:08:23 - INFO - [7adc3909-1397-4a36-b2dd-edd998aae077] Cleaned up temporary frame directory: temp_videos/7adc3909-1397-4a36-b2dd-edd998aae077 +2025-08-18 23:08:23 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_082.mp4' +2025-08-18 23:08:23 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] Video saved to temporary file: temp_videos/4b02a42f-ea2a-48f9-982a-52300d9dd936.mp4 +2025-08-18 23:08:23 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:08:28 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:08:28 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] 30 frames saved to temp_videos/4b02a42f-ea2a-48f9-982a-52300d9dd936 +2025-08-18 23:08:29 - INFO - Prompt token length: 2276 +2025-08-18 23:08:37 - INFO - Tokens per second: 11.255230746577219, Peak GPU memory MB: 4498.375 +2025-08-18 23:08:37 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] Inference time: 13.97 seconds, CPU usage: 51.9%, CPU core utilization: [37.6, 34.3, 42.3, 93.3] +2025-08-18 23:08:37 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] Cleaned up temporary file: temp_videos/4b02a42f-ea2a-48f9-982a-52300d9dd936.mp4 +2025-08-18 23:08:37 - INFO - [4b02a42f-ea2a-48f9-982a-52300d9dd936] Cleaned up temporary frame directory: temp_videos/4b02a42f-ea2a-48f9-982a-52300d9dd936 +2025-08-18 23:08:37 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_083.mp4' +2025-08-18 23:08:37 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] Video saved to temporary file: temp_videos/42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1.mp4 +2025-08-18 23:08:37 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:08:42 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:08:42 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] 30 frames saved to temp_videos/42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1 +2025-08-18 23:08:43 - INFO - Prompt token length: 2276 +2025-08-18 23:08:53 - INFO - Tokens per second: 12.089796796705802, Peak GPU memory MB: 4498.375 +2025-08-18 23:08:53 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] Inference time: 16.04 seconds, CPU usage: 49.5%, CPU core utilization: [74.8, 38.0, 46.3, 39.0] +2025-08-18 23:08:53 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] Cleaned up temporary file: temp_videos/42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1.mp4 +2025-08-18 23:08:53 - INFO - [42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1] Cleaned up temporary frame directory: temp_videos/42a5f0a1-e4c8-4bd0-aec9-1dc8f805a3b1 +2025-08-18 23:08:53 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_084.mp4' +2025-08-18 23:08:53 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] Video saved to temporary file: temp_videos/5629ae35-2cf2-416d-bf4a-e92d95d9db54.mp4 +2025-08-18 23:08:53 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:08:58 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:08:59 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] 30 frames saved to temp_videos/5629ae35-2cf2-416d-bf4a-e92d95d9db54 +2025-08-18 23:08:59 - INFO - Prompt token length: 2276 +2025-08-18 23:09:07 - INFO - Tokens per second: 10.951788067871178, Peak GPU memory MB: 4498.375 +2025-08-18 23:09:07 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] Inference time: 13.59 seconds, CPU usage: 53.6%, CPU core utilization: [42.3, 83.1, 39.1, 49.7] +2025-08-18 23:09:07 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] Cleaned up temporary file: temp_videos/5629ae35-2cf2-416d-bf4a-e92d95d9db54.mp4 +2025-08-18 23:09:07 - INFO - [5629ae35-2cf2-416d-bf4a-e92d95d9db54] Cleaned up temporary frame directory: temp_videos/5629ae35-2cf2-416d-bf4a-e92d95d9db54 +2025-08-18 23:09:07 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_085.mp4' +2025-08-18 23:09:07 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] Video saved to temporary file: temp_videos/841ca3fb-9bf0-4a03-8226-ef5c4f17975f.mp4 +2025-08-18 23:09:07 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:09:12 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:09:13 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] 30 frames saved to temp_videos/841ca3fb-9bf0-4a03-8226-ef5c4f17975f +2025-08-18 23:09:13 - INFO - Prompt token length: 2276 +2025-08-18 23:09:20 - INFO - Tokens per second: 10.650609352241108, Peak GPU memory MB: 4498.375 +2025-08-18 23:09:20 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] Inference time: 13.29 seconds, CPU usage: 56.2%, CPU core utilization: [48.6, 52.6, 55.3, 68.2] +2025-08-18 23:09:20 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] Cleaned up temporary file: temp_videos/841ca3fb-9bf0-4a03-8226-ef5c4f17975f.mp4 +2025-08-18 23:09:20 - INFO - [841ca3fb-9bf0-4a03-8226-ef5c4f17975f] Cleaned up temporary frame directory: temp_videos/841ca3fb-9bf0-4a03-8226-ef5c4f17975f +2025-08-18 23:09:20 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_086.mp4' +2025-08-18 23:09:20 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] Video saved to temporary file: temp_videos/987a4569-c7d9-4188-9e20-c82653bc42a0.mp4 +2025-08-18 23:09:20 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:09:26 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:09:26 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] 30 frames saved to temp_videos/987a4569-c7d9-4188-9e20-c82653bc42a0 +2025-08-18 23:09:26 - INFO - Prompt token length: 2276 +2025-08-18 23:09:33 - INFO - Tokens per second: 10.625347178131712, Peak GPU memory MB: 4498.375 +2025-08-18 23:09:33 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] Inference time: 13.10 seconds, CPU usage: 55.1%, CPU core utilization: [45.7, 59.9, 41.6, 73.2] +2025-08-18 23:09:33 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] Cleaned up temporary file: temp_videos/987a4569-c7d9-4188-9e20-c82653bc42a0.mp4 +2025-08-18 23:09:33 - INFO - [987a4569-c7d9-4188-9e20-c82653bc42a0] Cleaned up temporary frame directory: temp_videos/987a4569-c7d9-4188-9e20-c82653bc42a0 +2025-08-18 23:09:33 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_087.mp4' +2025-08-18 23:09:33 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] Video saved to temporary file: temp_videos/0dc8e783-7808-4514-8744-cc61187ff2e3.mp4 +2025-08-18 23:09:33 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:09:39 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:09:39 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] 30 frames saved to temp_videos/0dc8e783-7808-4514-8744-cc61187ff2e3 +2025-08-18 23:09:40 - INFO - Prompt token length: 2276 +2025-08-18 23:09:58 - INFO - Tokens per second: 13.438866702473879, Peak GPU memory MB: 4498.375 +2025-08-18 23:09:58 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] Inference time: 25.00 seconds, CPU usage: 44.4%, CPU core utilization: [31.1, 25.6, 89.8, 31.1] +2025-08-18 23:09:58 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] Cleaned up temporary file: temp_videos/0dc8e783-7808-4514-8744-cc61187ff2e3.mp4 +2025-08-18 23:09:58 - INFO - [0dc8e783-7808-4514-8744-cc61187ff2e3] Cleaned up temporary frame directory: temp_videos/0dc8e783-7808-4514-8744-cc61187ff2e3 +2025-08-18 23:09:58 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_088.mp4' +2025-08-18 23:09:58 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] Video saved to temporary file: temp_videos/e7161349-7c02-4f5b-909c-387cf63d58ee.mp4 +2025-08-18 23:09:58 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:10:04 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:10:04 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] 30 frames saved to temp_videos/e7161349-7c02-4f5b-909c-387cf63d58ee +2025-08-18 23:10:05 - INFO - Prompt token length: 2276 +2025-08-18 23:10:11 - INFO - Tokens per second: 10.261336029912457, Peak GPU memory MB: 4498.375 +2025-08-18 23:10:11 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] Inference time: 12.51 seconds, CPU usage: 57.7%, CPU core utilization: [46.2, 51.0, 43.7, 90.0] +2025-08-18 23:10:11 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] Cleaned up temporary file: temp_videos/e7161349-7c02-4f5b-909c-387cf63d58ee.mp4 +2025-08-18 23:10:11 - INFO - [e7161349-7c02-4f5b-909c-387cf63d58ee] Cleaned up temporary frame directory: temp_videos/e7161349-7c02-4f5b-909c-387cf63d58ee +2025-08-18 23:10:11 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_089.mp4' +2025-08-18 23:10:11 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] Video saved to temporary file: temp_videos/0a62e439-75c5-4c37-97d8-1f4c87867626.mp4 +2025-08-18 23:10:11 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:10:16 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:10:16 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] 30 frames saved to temp_videos/0a62e439-75c5-4c37-97d8-1f4c87867626 +2025-08-18 23:10:16 - INFO - Prompt token length: 2276 +2025-08-18 23:10:24 - INFO - Tokens per second: 10.88756330840412, Peak GPU memory MB: 4498.375 +2025-08-18 23:10:24 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] Inference time: 12.64 seconds, CPU usage: 52.4%, CPU core utilization: [45.3, 41.2, 83.7, 39.1] +2025-08-18 23:10:24 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] Cleaned up temporary file: temp_videos/0a62e439-75c5-4c37-97d8-1f4c87867626.mp4 +2025-08-18 23:10:24 - INFO - [0a62e439-75c5-4c37-97d8-1f4c87867626] Cleaned up temporary frame directory: temp_videos/0a62e439-75c5-4c37-97d8-1f4c87867626 +2025-08-18 23:10:24 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_090.mp4' +2025-08-18 23:10:24 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] Video saved to temporary file: temp_videos/30dd0fcd-6bae-4ceb-a580-d2a23f37b672.mp4 +2025-08-18 23:10:24 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:10:29 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:10:29 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] 30 frames saved to temp_videos/30dd0fcd-6bae-4ceb-a580-d2a23f37b672 +2025-08-18 23:10:30 - INFO - Prompt token length: 2276 +2025-08-18 23:10:37 - INFO - Tokens per second: 10.733051985105094, Peak GPU memory MB: 4498.375 +2025-08-18 23:10:37 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] Inference time: 13.56 seconds, CPU usage: 54.8%, CPU core utilization: [45.4, 90.9, 39.9, 43.1] +2025-08-18 23:10:37 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] Cleaned up temporary file: temp_videos/30dd0fcd-6bae-4ceb-a580-d2a23f37b672.mp4 +2025-08-18 23:10:37 - INFO - [30dd0fcd-6bae-4ceb-a580-d2a23f37b672] Cleaned up temporary frame directory: temp_videos/30dd0fcd-6bae-4ceb-a580-d2a23f37b672 +2025-08-18 23:10:37 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_091.mp4' +2025-08-18 23:10:37 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] Video saved to temporary file: temp_videos/98416dc3-19c1-4d77-9236-f16070503b1d.mp4 +2025-08-18 23:10:37 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:10:42 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:10:42 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] 30 frames saved to temp_videos/98416dc3-19c1-4d77-9236-f16070503b1d +2025-08-18 23:10:43 - INFO - Prompt token length: 2276 +2025-08-18 23:10:49 - INFO - Tokens per second: 9.848084413506196, Peak GPU memory MB: 4498.375 +2025-08-18 23:10:49 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] Inference time: 11.43 seconds, CPU usage: 54.7%, CPU core utilization: [43.7, 43.7, 40.6, 90.8] +2025-08-18 23:10:49 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] Cleaned up temporary file: temp_videos/98416dc3-19c1-4d77-9236-f16070503b1d.mp4 +2025-08-18 23:10:49 - INFO - [98416dc3-19c1-4d77-9236-f16070503b1d] Cleaned up temporary frame directory: temp_videos/98416dc3-19c1-4d77-9236-f16070503b1d +2025-08-18 23:10:49 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_092.mp4' +2025-08-18 23:10:49 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] Video saved to temporary file: temp_videos/c58fb2f2-4526-478d-8960-a52745cae97c.mp4 +2025-08-18 23:10:49 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:10:53 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:10:53 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] 30 frames saved to temp_videos/c58fb2f2-4526-478d-8960-a52745cae97c +2025-08-18 23:10:53 - INFO - Prompt token length: 2276 +2025-08-18 23:10:58 - INFO - Tokens per second: 8.88533753629756, Peak GPU memory MB: 4498.375 +2025-08-18 23:10:58 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] Inference time: 9.65 seconds, CPU usage: 54.6%, CPU core utilization: [46.9, 76.8, 39.4, 55.5] +2025-08-18 23:10:58 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] Cleaned up temporary file: temp_videos/c58fb2f2-4526-478d-8960-a52745cae97c.mp4 +2025-08-18 23:10:58 - INFO - [c58fb2f2-4526-478d-8960-a52745cae97c] Cleaned up temporary frame directory: temp_videos/c58fb2f2-4526-478d-8960-a52745cae97c +2025-08-18 23:10:58 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_093.mp4' +2025-08-18 23:10:58 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] Video saved to temporary file: temp_videos/5361da2b-9bc7-43d8-b8ad-da59241eeb55.mp4 +2025-08-18 23:10:58 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:11:01 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:11:01 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] 30 frames saved to temp_videos/5361da2b-9bc7-43d8-b8ad-da59241eeb55 +2025-08-18 23:11:01 - INFO - Prompt token length: 2276 +2025-08-18 23:11:09 - INFO - Tokens per second: 10.976556836391119, Peak GPU memory MB: 4498.375 +2025-08-18 23:11:09 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] Inference time: 10.78 seconds, CPU usage: 42.7%, CPU core utilization: [26.9, 33.6, 30.2, 79.9] +2025-08-18 23:11:09 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] Cleaned up temporary file: temp_videos/5361da2b-9bc7-43d8-b8ad-da59241eeb55.mp4 +2025-08-18 23:11:09 - INFO - [5361da2b-9bc7-43d8-b8ad-da59241eeb55] Cleaned up temporary frame directory: temp_videos/5361da2b-9bc7-43d8-b8ad-da59241eeb55 +2025-08-18 23:11:09 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_094.mp4' +2025-08-18 23:11:09 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] Video saved to temporary file: temp_videos/69ad63b6-b325-40ad-a235-3ffb0f44b107.mp4 +2025-08-18 23:11:09 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:11:11 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:11:11 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] 30 frames saved to temp_videos/69ad63b6-b325-40ad-a235-3ffb0f44b107 +2025-08-18 23:11:12 - INFO - Prompt token length: 2276 +2025-08-18 23:11:20 - INFO - Tokens per second: 11.347500730261487, Peak GPU memory MB: 4498.375 +2025-08-18 23:11:20 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] Inference time: 10.68 seconds, CPU usage: 41.4%, CPU core utilization: [36.0, 22.7, 76.2, 30.7] +2025-08-18 23:11:20 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] Cleaned up temporary file: temp_videos/69ad63b6-b325-40ad-a235-3ffb0f44b107.mp4 +2025-08-18 23:11:20 - INFO - [69ad63b6-b325-40ad-a235-3ffb0f44b107] Cleaned up temporary frame directory: temp_videos/69ad63b6-b325-40ad-a235-3ffb0f44b107 +2025-08-18 23:11:20 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-18 23:11:20 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] Video saved to temporary file: temp_videos/2c58f485-d411-4796-b1a2-4992c0058b9c.mp4 +2025-08-18 23:11:20 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:11:24 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:11:24 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] 30 frames saved to temp_videos/2c58f485-d411-4796-b1a2-4992c0058b9c +2025-08-18 23:11:24 - INFO - Prompt token length: 2276 +2025-08-18 23:11:43 - INFO - Tokens per second: 13.40059383147904, Peak GPU memory MB: 4498.375 +2025-08-18 23:11:43 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] Inference time: 22.91 seconds, CPU usage: 39.6%, CPU core utilization: [45.6, 27.3, 30.7, 54.9] +2025-08-18 23:11:43 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] Cleaned up temporary file: temp_videos/2c58f485-d411-4796-b1a2-4992c0058b9c.mp4 +2025-08-18 23:11:43 - INFO - [2c58f485-d411-4796-b1a2-4992c0058b9c] Cleaned up temporary frame directory: temp_videos/2c58f485-d411-4796-b1a2-4992c0058b9c +2025-08-18 23:11:43 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_002.mp4' +2025-08-18 23:11:43 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] Video saved to temporary file: temp_videos/77ae1bc5-c6bf-4876-8f52-328824af2913.mp4 +2025-08-18 23:11:43 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:11:49 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:11:49 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] 30 frames saved to temp_videos/77ae1bc5-c6bf-4876-8f52-328824af2913 +2025-08-18 23:11:50 - INFO - Prompt token length: 2276 +2025-08-18 23:11:56 - INFO - Tokens per second: 10.05763431796692, Peak GPU memory MB: 4498.375 +2025-08-18 23:11:56 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] Inference time: 12.75 seconds, CPU usage: 58.9%, CPU core utilization: [57.3, 49.5, 77.4, 51.4] +2025-08-18 23:11:56 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] Cleaned up temporary file: temp_videos/77ae1bc5-c6bf-4876-8f52-328824af2913.mp4 +2025-08-18 23:11:56 - INFO - [77ae1bc5-c6bf-4876-8f52-328824af2913] Cleaned up temporary frame directory: temp_videos/77ae1bc5-c6bf-4876-8f52-328824af2913 +2025-08-18 23:11:56 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_003.mp4' +2025-08-18 23:11:56 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] Video saved to temporary file: temp_videos/7beb0d6c-4fd9-4da6-b736-2f72a5a6565f.mp4 +2025-08-18 23:11:56 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:12:01 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:12:01 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] 30 frames saved to temp_videos/7beb0d6c-4fd9-4da6-b736-2f72a5a6565f +2025-08-18 23:12:02 - INFO - Prompt token length: 2276 +2025-08-18 23:12:08 - INFO - Tokens per second: 10.362352898648883, Peak GPU memory MB: 4498.375 +2025-08-18 23:12:08 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] Inference time: 12.63 seconds, CPU usage: 55.6%, CPU core utilization: [45.3, 41.0, 40.3, 95.7] +2025-08-18 23:12:08 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] Cleaned up temporary file: temp_videos/7beb0d6c-4fd9-4da6-b736-2f72a5a6565f.mp4 +2025-08-18 23:12:08 - INFO - [7beb0d6c-4fd9-4da6-b736-2f72a5a6565f] Cleaned up temporary frame directory: temp_videos/7beb0d6c-4fd9-4da6-b736-2f72a5a6565f +2025-08-18 23:12:09 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_004.mp4' +2025-08-18 23:12:09 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] Video saved to temporary file: temp_videos/7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3.mp4 +2025-08-18 23:12:09 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:12:14 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:12:14 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] 30 frames saved to temp_videos/7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3 +2025-08-18 23:12:14 - INFO - Prompt token length: 2276 +2025-08-18 23:12:33 - INFO - Tokens per second: 13.296233255859406, Peak GPU memory MB: 4498.375 +2025-08-18 23:12:33 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] Inference time: 24.65 seconds, CPU usage: 42.4%, CPU core utilization: [26.8, 21.9, 30.7, 90.2] +2025-08-18 23:12:33 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] Cleaned up temporary file: temp_videos/7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3.mp4 +2025-08-18 23:12:33 - INFO - [7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3] Cleaned up temporary frame directory: temp_videos/7ad52cc1-a4bc-4a7d-b9cd-a3010ea235b3 +2025-08-18 23:12:33 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_005.mp4' +2025-08-18 23:12:33 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] Video saved to temporary file: temp_videos/c16b56c5-6d8e-41b3-94cd-7af1f316e289.mp4 +2025-08-18 23:12:33 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:12:38 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:12:38 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] 30 frames saved to temp_videos/c16b56c5-6d8e-41b3-94cd-7af1f316e289 +2025-08-18 23:12:39 - INFO - Prompt token length: 2276 +2025-08-18 23:12:47 - INFO - Tokens per second: 10.711200379469911, Peak GPU memory MB: 4498.375 +2025-08-18 23:12:47 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] Inference time: 13.55 seconds, CPU usage: 55.0%, CPU core utilization: [41.9, 41.9, 43.0, 92.8] +2025-08-18 23:12:47 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] Cleaned up temporary file: temp_videos/c16b56c5-6d8e-41b3-94cd-7af1f316e289.mp4 +2025-08-18 23:12:47 - INFO - [c16b56c5-6d8e-41b3-94cd-7af1f316e289] Cleaned up temporary frame directory: temp_videos/c16b56c5-6d8e-41b3-94cd-7af1f316e289 +2025-08-18 23:12:47 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_006.mp4' +2025-08-18 23:12:47 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] Video saved to temporary file: temp_videos/8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e.mp4 +2025-08-18 23:12:47 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:12:53 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:12:53 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] 30 frames saved to temp_videos/8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e +2025-08-18 23:12:54 - INFO - Prompt token length: 2276 +2025-08-18 23:13:01 - INFO - Tokens per second: 10.554908172002714, Peak GPU memory MB: 4498.375 +2025-08-18 23:13:01 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] Inference time: 14.00 seconds, CPU usage: 58.1%, CPU core utilization: [53.2, 60.0, 44.6, 74.4] +2025-08-18 23:13:01 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] Cleaned up temporary file: temp_videos/8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e.mp4 +2025-08-18 23:13:01 - INFO - [8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e] Cleaned up temporary frame directory: temp_videos/8f69bcd2-297f-4a0b-bd3b-4bbe15bb738e +2025-08-18 23:13:01 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_007.mp4' +2025-08-18 23:13:01 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] Video saved to temporary file: temp_videos/070c47c1-530d-48c4-aa90-2074ecf4740c.mp4 +2025-08-18 23:13:01 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:13:06 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:13:07 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] 30 frames saved to temp_videos/070c47c1-530d-48c4-aa90-2074ecf4740c +2025-08-18 23:13:07 - INFO - Prompt token length: 2276 +2025-08-18 23:13:14 - INFO - Tokens per second: 8.628679137685621, Peak GPU memory MB: 4498.375 +2025-08-18 23:13:14 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] Inference time: 12.56 seconds, CPU usage: 74.4%, CPU core utilization: [67.5, 68.4, 67.8, 94.0] +2025-08-18 23:13:14 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] Cleaned up temporary file: temp_videos/070c47c1-530d-48c4-aa90-2074ecf4740c.mp4 +2025-08-18 23:13:14 - INFO - [070c47c1-530d-48c4-aa90-2074ecf4740c] Cleaned up temporary frame directory: temp_videos/070c47c1-530d-48c4-aa90-2074ecf4740c +2025-08-18 23:13:14 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_008.mp4' +2025-08-18 23:13:14 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] Video saved to temporary file: temp_videos/8ab3be6f-5e8f-4019-bd95-7b134abba037.mp4 +2025-08-18 23:13:14 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:13:19 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:13:19 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] 30 frames saved to temp_videos/8ab3be6f-5e8f-4019-bd95-7b134abba037 +2025-08-18 23:13:20 - INFO - Prompt token length: 2276 +2025-08-18 23:13:25 - INFO - Tokens per second: 9.15227998451498, Peak GPU memory MB: 4498.375 +2025-08-18 23:13:25 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] Inference time: 11.11 seconds, CPU usage: 60.6%, CPU core utilization: [65.4, 50.8, 73.7, 52.5] +2025-08-18 23:13:25 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] Cleaned up temporary file: temp_videos/8ab3be6f-5e8f-4019-bd95-7b134abba037.mp4 +2025-08-18 23:13:25 - INFO - [8ab3be6f-5e8f-4019-bd95-7b134abba037] Cleaned up temporary frame directory: temp_videos/8ab3be6f-5e8f-4019-bd95-7b134abba037 +2025-08-18 23:13:25 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_009.mp4' +2025-08-18 23:13:25 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] Video saved to temporary file: temp_videos/ec016550-cc85-48d7-b4dc-ba7c5c941861.mp4 +2025-08-18 23:13:25 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:13:30 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:13:30 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] 30 frames saved to temp_videos/ec016550-cc85-48d7-b4dc-ba7c5c941861 +2025-08-18 23:13:31 - INFO - Prompt token length: 2276 +2025-08-18 23:13:38 - INFO - Tokens per second: 10.469246141743787, Peak GPU memory MB: 4498.375 +2025-08-18 23:13:38 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] Inference time: 12.91 seconds, CPU usage: 56.9%, CPU core utilization: [51.9, 50.0, 82.6, 43.0] +2025-08-18 23:13:38 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] Cleaned up temporary file: temp_videos/ec016550-cc85-48d7-b4dc-ba7c5c941861.mp4 +2025-08-18 23:13:38 - INFO - [ec016550-cc85-48d7-b4dc-ba7c5c941861] Cleaned up temporary frame directory: temp_videos/ec016550-cc85-48d7-b4dc-ba7c5c941861 +2025-08-18 23:13:38 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_010.mp4' +2025-08-18 23:13:38 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] Video saved to temporary file: temp_videos/6ce9f890-2260-4d17-b801-b3f9567d0639.mp4 +2025-08-18 23:13:38 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:13:43 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:13:43 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] 30 frames saved to temp_videos/6ce9f890-2260-4d17-b801-b3f9567d0639 +2025-08-18 23:13:44 - INFO - Prompt token length: 2276 +2025-08-18 23:14:02 - INFO - Tokens per second: 13.449126642449913, Peak GPU memory MB: 4498.375 +2025-08-18 23:14:02 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] Inference time: 24.43 seconds, CPU usage: 42.5%, CPU core utilization: [33.7, 26.9, 85.1, 24.2] +2025-08-18 23:14:02 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] Cleaned up temporary file: temp_videos/6ce9f890-2260-4d17-b801-b3f9567d0639.mp4 +2025-08-18 23:14:02 - INFO - [6ce9f890-2260-4d17-b801-b3f9567d0639] Cleaned up temporary frame directory: temp_videos/6ce9f890-2260-4d17-b801-b3f9567d0639 +2025-08-18 23:14:02 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_011.mp4' +2025-08-18 23:14:02 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] Video saved to temporary file: temp_videos/9f0cf22e-856c-4854-9c51-08e35282949b.mp4 +2025-08-18 23:14:02 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:14:08 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:14:08 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] 30 frames saved to temp_videos/9f0cf22e-856c-4854-9c51-08e35282949b +2025-08-18 23:14:08 - INFO - Prompt token length: 2276 +2025-08-18 23:14:16 - INFO - Tokens per second: 10.74788200235932, Peak GPU memory MB: 4498.375 +2025-08-18 23:14:16 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] Inference time: 13.56 seconds, CPU usage: 54.7%, CPU core utilization: [41.7, 38.4, 43.0, 95.6] +2025-08-18 23:14:16 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] Cleaned up temporary file: temp_videos/9f0cf22e-856c-4854-9c51-08e35282949b.mp4 +2025-08-18 23:14:16 - INFO - [9f0cf22e-856c-4854-9c51-08e35282949b] Cleaned up temporary frame directory: temp_videos/9f0cf22e-856c-4854-9c51-08e35282949b +2025-08-18 23:14:16 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_012.mp4' +2025-08-18 23:14:16 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] Video saved to temporary file: temp_videos/f0f7ad76-3281-4018-aef1-04b65f01e985.mp4 +2025-08-18 23:14:16 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:14:22 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:14:22 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] 30 frames saved to temp_videos/f0f7ad76-3281-4018-aef1-04b65f01e985 +2025-08-18 23:14:22 - INFO - Prompt token length: 2276 +2025-08-18 23:14:32 - INFO - Tokens per second: 11.649960463588755, Peak GPU memory MB: 4498.375 +2025-08-18 23:14:32 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] Inference time: 15.87 seconds, CPU usage: 51.9%, CPU core utilization: [50.2, 61.0, 37.1, 59.2] +2025-08-18 23:14:32 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] Cleaned up temporary file: temp_videos/f0f7ad76-3281-4018-aef1-04b65f01e985.mp4 +2025-08-18 23:14:32 - INFO - [f0f7ad76-3281-4018-aef1-04b65f01e985] Cleaned up temporary frame directory: temp_videos/f0f7ad76-3281-4018-aef1-04b65f01e985 +2025-08-18 23:14:32 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_013.mp4' +2025-08-18 23:14:32 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] Video saved to temporary file: temp_videos/4afe3f18-927c-4666-bf23-ce5a027f7e75.mp4 +2025-08-18 23:14:32 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:14:37 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:14:37 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] 30 frames saved to temp_videos/4afe3f18-927c-4666-bf23-ce5a027f7e75 +2025-08-18 23:14:38 - INFO - Prompt token length: 2276 +2025-08-18 23:14:56 - INFO - Tokens per second: 13.434648062904184, Peak GPU memory MB: 4498.375 +2025-08-18 23:14:56 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] Inference time: 24.37 seconds, CPU usage: 43.2%, CPU core utilization: [36.8, 35.2, 67.0, 33.6] +2025-08-18 23:14:56 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] Cleaned up temporary file: temp_videos/4afe3f18-927c-4666-bf23-ce5a027f7e75.mp4 +2025-08-18 23:14:56 - INFO - [4afe3f18-927c-4666-bf23-ce5a027f7e75] Cleaned up temporary frame directory: temp_videos/4afe3f18-927c-4666-bf23-ce5a027f7e75 +2025-08-18 23:14:56 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_014.mp4' +2025-08-18 23:14:56 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] Video saved to temporary file: temp_videos/a58897f2-253f-4edb-8fb0-42e61d2983d3.mp4 +2025-08-18 23:14:56 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:15:01 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:15:01 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] 30 frames saved to temp_videos/a58897f2-253f-4edb-8fb0-42e61d2983d3 +2025-08-18 23:15:02 - INFO - Prompt token length: 2276 +2025-08-18 23:15:20 - INFO - Tokens per second: 13.479950074027617, Peak GPU memory MB: 4498.375 +2025-08-18 23:15:20 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] Inference time: 24.03 seconds, CPU usage: 42.7%, CPU core utilization: [48.2, 24.3, 70.2, 28.1] +2025-08-18 23:15:20 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] Cleaned up temporary file: temp_videos/a58897f2-253f-4edb-8fb0-42e61d2983d3.mp4 +2025-08-18 23:15:20 - INFO - [a58897f2-253f-4edb-8fb0-42e61d2983d3] Cleaned up temporary frame directory: temp_videos/a58897f2-253f-4edb-8fb0-42e61d2983d3 +2025-08-18 23:15:20 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_015.mp4' +2025-08-18 23:15:20 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] Video saved to temporary file: temp_videos/b1fcd7a6-a728-473c-a23c-39e959e4fd97.mp4 +2025-08-18 23:15:20 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:15:26 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:15:26 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] 30 frames saved to temp_videos/b1fcd7a6-a728-473c-a23c-39e959e4fd97 +2025-08-18 23:15:27 - INFO - Prompt token length: 2276 +2025-08-18 23:15:36 - INFO - Tokens per second: 11.640350568059489, Peak GPU memory MB: 4498.375 +2025-08-18 23:15:36 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] Inference time: 15.76 seconds, CPU usage: 52.7%, CPU core utilization: [42.0, 34.5, 36.8, 97.4] +2025-08-18 23:15:36 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] Cleaned up temporary file: temp_videos/b1fcd7a6-a728-473c-a23c-39e959e4fd97.mp4 +2025-08-18 23:15:36 - INFO - [b1fcd7a6-a728-473c-a23c-39e959e4fd97] Cleaned up temporary frame directory: temp_videos/b1fcd7a6-a728-473c-a23c-39e959e4fd97 +2025-08-18 23:15:36 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_016.mp4' +2025-08-18 23:15:36 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] Video saved to temporary file: temp_videos/a52d66d8-a802-4df3-95c1-769f488a71ba.mp4 +2025-08-18 23:15:36 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:15:42 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:15:42 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] 30 frames saved to temp_videos/a52d66d8-a802-4df3-95c1-769f488a71ba +2025-08-18 23:15:42 - INFO - Prompt token length: 2276 +2025-08-18 23:15:50 - INFO - Tokens per second: 11.043868658200097, Peak GPU memory MB: 4498.375 +2025-08-18 23:15:50 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] Inference time: 14.13 seconds, CPU usage: 54.7%, CPU core utilization: [43.8, 72.0, 41.7, 61.5] +2025-08-18 23:15:50 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] Cleaned up temporary file: temp_videos/a52d66d8-a802-4df3-95c1-769f488a71ba.mp4 +2025-08-18 23:15:50 - INFO - [a52d66d8-a802-4df3-95c1-769f488a71ba] Cleaned up temporary frame directory: temp_videos/a52d66d8-a802-4df3-95c1-769f488a71ba +2025-08-18 23:15:50 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_017.mp4' +2025-08-18 23:15:50 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] Video saved to temporary file: temp_videos/f8bef2f4-f05a-42bc-8d22-ac61ace04740.mp4 +2025-08-18 23:15:50 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:15:56 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:15:56 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] 30 frames saved to temp_videos/f8bef2f4-f05a-42bc-8d22-ac61ace04740 +2025-08-18 23:15:57 - INFO - Prompt token length: 2276 +2025-08-18 23:16:04 - INFO - Tokens per second: 10.51133078300418, Peak GPU memory MB: 4498.375 +2025-08-18 23:16:04 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] Inference time: 13.23 seconds, CPU usage: 54.4%, CPU core utilization: [44.0, 48.0, 42.7, 82.8] +2025-08-18 23:16:04 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] Cleaned up temporary file: temp_videos/f8bef2f4-f05a-42bc-8d22-ac61ace04740.mp4 +2025-08-18 23:16:04 - INFO - [f8bef2f4-f05a-42bc-8d22-ac61ace04740] Cleaned up temporary frame directory: temp_videos/f8bef2f4-f05a-42bc-8d22-ac61ace04740 +2025-08-18 23:16:04 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_018.mp4' +2025-08-18 23:16:04 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] Video saved to temporary file: temp_videos/c34da43d-575f-4b66-8d40-e83079b38c77.mp4 +2025-08-18 23:16:04 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:16:09 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:16:09 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] 30 frames saved to temp_videos/c34da43d-575f-4b66-8d40-e83079b38c77 +2025-08-18 23:16:09 - INFO - Prompt token length: 2276 +2025-08-18 23:16:17 - INFO - Tokens per second: 10.98742027777536, Peak GPU memory MB: 4498.375 +2025-08-18 23:16:17 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] Inference time: 13.31 seconds, CPU usage: 53.7%, CPU core utilization: [42.9, 47.3, 39.1, 85.5] +2025-08-18 23:16:17 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] Cleaned up temporary file: temp_videos/c34da43d-575f-4b66-8d40-e83079b38c77.mp4 +2025-08-18 23:16:17 - INFO - [c34da43d-575f-4b66-8d40-e83079b38c77] Cleaned up temporary frame directory: temp_videos/c34da43d-575f-4b66-8d40-e83079b38c77 +2025-08-18 23:16:17 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_019.mp4' +2025-08-18 23:16:17 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] Video saved to temporary file: temp_videos/1ebe2e39-e0ef-480c-bc3b-0913c92da0b2.mp4 +2025-08-18 23:16:17 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:16:23 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:16:23 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] 30 frames saved to temp_videos/1ebe2e39-e0ef-480c-bc3b-0913c92da0b2 +2025-08-18 23:16:23 - INFO - Prompt token length: 2276 +2025-08-18 23:16:31 - INFO - Tokens per second: 10.921069856446843, Peak GPU memory MB: 4498.375 +2025-08-18 23:16:31 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] Inference time: 13.78 seconds, CPU usage: 54.9%, CPU core utilization: [90.6, 49.1, 41.1, 38.6] +2025-08-18 23:16:31 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] Cleaned up temporary file: temp_videos/1ebe2e39-e0ef-480c-bc3b-0913c92da0b2.mp4 +2025-08-18 23:16:31 - INFO - [1ebe2e39-e0ef-480c-bc3b-0913c92da0b2] Cleaned up temporary frame directory: temp_videos/1ebe2e39-e0ef-480c-bc3b-0913c92da0b2 +2025-08-18 23:16:31 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_020.mp4' +2025-08-18 23:16:31 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] Video saved to temporary file: temp_videos/47b39fc3-3bcb-4adb-8a5c-48892b8d8282.mp4 +2025-08-18 23:16:31 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:16:36 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:16:36 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] 30 frames saved to temp_videos/47b39fc3-3bcb-4adb-8a5c-48892b8d8282 +2025-08-18 23:16:37 - INFO - Prompt token length: 2276 +2025-08-18 23:16:55 - INFO - Tokens per second: 13.491381583278928, Peak GPU memory MB: 4498.375 +2025-08-18 23:16:55 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] Inference time: 24.34 seconds, CPU usage: 43.2%, CPU core utilization: [40.0, 25.3, 79.9, 27.6] +2025-08-18 23:16:55 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] Cleaned up temporary file: temp_videos/47b39fc3-3bcb-4adb-8a5c-48892b8d8282.mp4 +2025-08-18 23:16:55 - INFO - [47b39fc3-3bcb-4adb-8a5c-48892b8d8282] Cleaned up temporary frame directory: temp_videos/47b39fc3-3bcb-4adb-8a5c-48892b8d8282 +2025-08-18 23:16:55 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_021.mp4' +2025-08-18 23:16:55 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] Video saved to temporary file: temp_videos/5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c.mp4 +2025-08-18 23:16:55 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:17:01 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:17:01 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] 30 frames saved to temp_videos/5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c +2025-08-18 23:17:01 - INFO - Prompt token length: 2276 +2025-08-18 23:17:09 - INFO - Tokens per second: 10.918759797926471, Peak GPU memory MB: 4498.375 +2025-08-18 23:17:09 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] Inference time: 13.62 seconds, CPU usage: 53.1%, CPU core utilization: [84.7, 43.6, 46.6, 37.5] +2025-08-18 23:17:09 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] Cleaned up temporary file: temp_videos/5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c.mp4 +2025-08-18 23:17:09 - INFO - [5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c] Cleaned up temporary frame directory: temp_videos/5a18f5db-72b7-4032-8c8d-8b4cca9f8e0c +2025-08-18 23:17:09 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_022.mp4' +2025-08-18 23:17:09 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] Video saved to temporary file: temp_videos/e867080e-8bdb-4121-950e-dfc53806cefa.mp4 +2025-08-18 23:17:09 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:17:14 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:17:14 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] 30 frames saved to temp_videos/e867080e-8bdb-4121-950e-dfc53806cefa +2025-08-18 23:17:15 - INFO - Prompt token length: 2276 +2025-08-18 23:17:22 - INFO - Tokens per second: 10.866962165814611, Peak GPU memory MB: 4498.375 +2025-08-18 23:17:22 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] Inference time: 12.84 seconds, CPU usage: 54.1%, CPU core utilization: [52.7, 52.2, 63.9, 47.9] +2025-08-18 23:17:22 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] Cleaned up temporary file: temp_videos/e867080e-8bdb-4121-950e-dfc53806cefa.mp4 +2025-08-18 23:17:22 - INFO - [e867080e-8bdb-4121-950e-dfc53806cefa] Cleaned up temporary frame directory: temp_videos/e867080e-8bdb-4121-950e-dfc53806cefa +2025-08-18 23:17:22 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_023.mp4' +2025-08-18 23:17:22 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] Video saved to temporary file: temp_videos/a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b.mp4 +2025-08-18 23:17:22 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:17:27 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:17:27 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] 30 frames saved to temp_videos/a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b +2025-08-18 23:17:27 - INFO - Prompt token length: 2276 +2025-08-18 23:17:38 - INFO - Tokens per second: 12.056567900835331, Peak GPU memory MB: 4498.375 +2025-08-18 23:17:38 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] Inference time: 16.07 seconds, CPU usage: 48.6%, CPU core utilization: [50.8, 76.4, 33.9, 33.1] +2025-08-18 23:17:38 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] Cleaned up temporary file: temp_videos/a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b.mp4 +2025-08-18 23:17:38 - INFO - [a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b] Cleaned up temporary frame directory: temp_videos/a33cdf5e-d2a0-4fbc-b02d-2cf19938ef2b +2025-08-18 23:17:38 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_024.mp4' +2025-08-18 23:17:38 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] Video saved to temporary file: temp_videos/2c7eb728-fd3b-4979-abff-ded2bcfc9498.mp4 +2025-08-18 23:17:38 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:17:44 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:17:44 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] 30 frames saved to temp_videos/2c7eb728-fd3b-4979-abff-ded2bcfc9498 +2025-08-18 23:17:44 - INFO - Prompt token length: 2276 +2025-08-18 23:18:03 - INFO - Tokens per second: 13.381210503864766, Peak GPU memory MB: 4498.375 +2025-08-18 23:18:03 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] Inference time: 24.63 seconds, CPU usage: 43.7%, CPU core utilization: [27.6, 45.4, 28.2, 73.7] +2025-08-18 23:18:03 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] Cleaned up temporary file: temp_videos/2c7eb728-fd3b-4979-abff-ded2bcfc9498.mp4 +2025-08-18 23:18:03 - INFO - [2c7eb728-fd3b-4979-abff-ded2bcfc9498] Cleaned up temporary frame directory: temp_videos/2c7eb728-fd3b-4979-abff-ded2bcfc9498 +2025-08-18 23:18:03 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_025.mp4' +2025-08-18 23:18:03 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] Video saved to temporary file: temp_videos/cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc.mp4 +2025-08-18 23:18:03 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:18:08 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:18:09 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] 30 frames saved to temp_videos/cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc +2025-08-18 23:18:09 - INFO - Prompt token length: 2276 +2025-08-18 23:18:18 - INFO - Tokens per second: 11.34225179666489, Peak GPU memory MB: 4498.375 +2025-08-18 23:18:18 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] Inference time: 14.62 seconds, CPU usage: 54.3%, CPU core utilization: [39.2, 45.2, 91.6, 41.1] +2025-08-18 23:18:18 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] Cleaned up temporary file: temp_videos/cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc.mp4 +2025-08-18 23:18:18 - INFO - [cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc] Cleaned up temporary frame directory: temp_videos/cfd937d8-7e9e-4e1f-b7a8-12dda7c9a0fc +2025-08-18 23:18:18 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_026.mp4' +2025-08-18 23:18:18 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] Video saved to temporary file: temp_videos/81527b03-bb00-4863-acd0-a08fdf851919.mp4 +2025-08-18 23:18:18 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:18:23 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:18:23 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] 30 frames saved to temp_videos/81527b03-bb00-4863-acd0-a08fdf851919 +2025-08-18 23:18:24 - INFO - Prompt token length: 2276 +2025-08-18 23:18:30 - INFO - Tokens per second: 10.428299716256006, Peak GPU memory MB: 4498.375 +2025-08-18 23:18:30 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] Inference time: 12.61 seconds, CPU usage: 57.1%, CPU core utilization: [71.9, 46.6, 61.8, 48.1] +2025-08-18 23:18:30 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] Cleaned up temporary file: temp_videos/81527b03-bb00-4863-acd0-a08fdf851919.mp4 +2025-08-18 23:18:30 - INFO - [81527b03-bb00-4863-acd0-a08fdf851919] Cleaned up temporary frame directory: temp_videos/81527b03-bb00-4863-acd0-a08fdf851919 +2025-08-18 23:18:30 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_027.mp4' +2025-08-18 23:18:30 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] Video saved to temporary file: temp_videos/e5e0067a-db6e-455c-a1f8-c473cdcecc0f.mp4 +2025-08-18 23:18:30 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:18:36 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:18:36 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] 30 frames saved to temp_videos/e5e0067a-db6e-455c-a1f8-c473cdcecc0f +2025-08-18 23:18:36 - INFO - Prompt token length: 2276 +2025-08-18 23:18:44 - INFO - Tokens per second: 10.689614725622825, Peak GPU memory MB: 4498.375 +2025-08-18 23:18:44 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] Inference time: 13.57 seconds, CPU usage: 56.5%, CPU core utilization: [41.5, 47.4, 49.0, 88.1] +2025-08-18 23:18:44 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] Cleaned up temporary file: temp_videos/e5e0067a-db6e-455c-a1f8-c473cdcecc0f.mp4 +2025-08-18 23:18:44 - INFO - [e5e0067a-db6e-455c-a1f8-c473cdcecc0f] Cleaned up temporary frame directory: temp_videos/e5e0067a-db6e-455c-a1f8-c473cdcecc0f +2025-08-18 23:18:44 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_028.mp4' +2025-08-18 23:18:44 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] Video saved to temporary file: temp_videos/c8960966-5214-4ce4-ad51-a448df43cdd2.mp4 +2025-08-18 23:18:44 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:18:50 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:18:50 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] 30 frames saved to temp_videos/c8960966-5214-4ce4-ad51-a448df43cdd2 +2025-08-18 23:18:50 - INFO - Prompt token length: 2276 +2025-08-18 23:19:09 - INFO - Tokens per second: 13.483402737231337, Peak GPU memory MB: 4498.375 +2025-08-18 23:19:09 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] Inference time: 24.66 seconds, CPU usage: 43.9%, CPU core utilization: [63.8, 27.2, 57.7, 27.0] +2025-08-18 23:19:09 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] Cleaned up temporary file: temp_videos/c8960966-5214-4ce4-ad51-a448df43cdd2.mp4 +2025-08-18 23:19:09 - INFO - [c8960966-5214-4ce4-ad51-a448df43cdd2] Cleaned up temporary frame directory: temp_videos/c8960966-5214-4ce4-ad51-a448df43cdd2 +2025-08-18 23:19:09 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_029.mp4' +2025-08-18 23:19:09 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] Video saved to temporary file: temp_videos/5e72c95d-a592-4347-99af-185f49d942cb.mp4 +2025-08-18 23:19:09 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:19:14 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:19:14 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] 30 frames saved to temp_videos/5e72c95d-a592-4347-99af-185f49d942cb +2025-08-18 23:19:15 - INFO - Prompt token length: 2276 +2025-08-18 23:19:21 - INFO - Tokens per second: 10.14359586210809, Peak GPU memory MB: 4498.375 +2025-08-18 23:19:21 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] Inference time: 12.20 seconds, CPU usage: 57.1%, CPU core utilization: [64.5, 58.1, 44.6, 61.3] +2025-08-18 23:19:21 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] Cleaned up temporary file: temp_videos/5e72c95d-a592-4347-99af-185f49d942cb.mp4 +2025-08-18 23:19:21 - INFO - [5e72c95d-a592-4347-99af-185f49d942cb] Cleaned up temporary frame directory: temp_videos/5e72c95d-a592-4347-99af-185f49d942cb +2025-08-18 23:19:21 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_030.mp4' +2025-08-18 23:19:21 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] Video saved to temporary file: temp_videos/95cedc5c-e22d-4383-920b-43c033076cf1.mp4 +2025-08-18 23:19:21 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:19:27 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:19:28 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] 30 frames saved to temp_videos/95cedc5c-e22d-4383-920b-43c033076cf1 +2025-08-18 23:19:28 - INFO - Prompt token length: 2276 +2025-08-18 23:19:36 - INFO - Tokens per second: 10.746648641485438, Peak GPU memory MB: 4498.375 +2025-08-18 23:19:36 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] Inference time: 14.55 seconds, CPU usage: 58.5%, CPU core utilization: [52.2, 82.8, 47.5, 51.5] +2025-08-18 23:19:36 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] Cleaned up temporary file: temp_videos/95cedc5c-e22d-4383-920b-43c033076cf1.mp4 +2025-08-18 23:19:36 - INFO - [95cedc5c-e22d-4383-920b-43c033076cf1] Cleaned up temporary frame directory: temp_videos/95cedc5c-e22d-4383-920b-43c033076cf1 +2025-08-18 23:19:36 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_031.mp4' +2025-08-18 23:19:36 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] Video saved to temporary file: temp_videos/26b3ebdf-7333-440c-ab40-1395e2f8b4ac.mp4 +2025-08-18 23:19:36 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:19:42 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:19:42 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] 30 frames saved to temp_videos/26b3ebdf-7333-440c-ab40-1395e2f8b4ac +2025-08-18 23:19:42 - INFO - Prompt token length: 2276 +2025-08-18 23:19:49 - INFO - Tokens per second: 10.552761688015028, Peak GPU memory MB: 4498.375 +2025-08-18 23:19:49 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] Inference time: 13.73 seconds, CPU usage: 57.5%, CPU core utilization: [85.1, 49.1, 51.1, 44.8] +2025-08-18 23:19:49 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] Cleaned up temporary file: temp_videos/26b3ebdf-7333-440c-ab40-1395e2f8b4ac.mp4 +2025-08-18 23:19:49 - INFO - [26b3ebdf-7333-440c-ab40-1395e2f8b4ac] Cleaned up temporary frame directory: temp_videos/26b3ebdf-7333-440c-ab40-1395e2f8b4ac +2025-08-18 23:19:49 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_032.mp4' +2025-08-18 23:19:49 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] Video saved to temporary file: temp_videos/e6be423e-a8f1-4c32-af6c-53d343f2f396.mp4 +2025-08-18 23:19:49 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:19:55 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:19:55 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] 30 frames saved to temp_videos/e6be423e-a8f1-4c32-af6c-53d343f2f396 +2025-08-18 23:19:56 - INFO - Prompt token length: 2276 +2025-08-18 23:20:14 - INFO - Tokens per second: 13.374680716381132, Peak GPU memory MB: 4498.375 +2025-08-18 23:20:14 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] Inference time: 24.96 seconds, CPU usage: 44.3%, CPU core utilization: [25.3, 23.5, 32.3, 96.0] +2025-08-18 23:20:14 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] Cleaned up temporary file: temp_videos/e6be423e-a8f1-4c32-af6c-53d343f2f396.mp4 +2025-08-18 23:20:14 - INFO - [e6be423e-a8f1-4c32-af6c-53d343f2f396] Cleaned up temporary frame directory: temp_videos/e6be423e-a8f1-4c32-af6c-53d343f2f396 +2025-08-18 23:20:14 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_033.mp4' +2025-08-18 23:20:14 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] Video saved to temporary file: temp_videos/2ac6b08b-e900-43c1-ae17-ff2ecc5892a4.mp4 +2025-08-18 23:20:14 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:20:19 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:20:19 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] 30 frames saved to temp_videos/2ac6b08b-e900-43c1-ae17-ff2ecc5892a4 +2025-08-18 23:20:20 - INFO - Prompt token length: 2276 +2025-08-18 23:20:25 - INFO - Tokens per second: 8.750997375438747, Peak GPU memory MB: 4498.375 +2025-08-18 23:20:25 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] Inference time: 10.46 seconds, CPU usage: 59.6%, CPU core utilization: [47.0, 86.9, 54.7, 49.6] +2025-08-18 23:20:25 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] Cleaned up temporary file: temp_videos/2ac6b08b-e900-43c1-ae17-ff2ecc5892a4.mp4 +2025-08-18 23:20:25 - INFO - [2ac6b08b-e900-43c1-ae17-ff2ecc5892a4] Cleaned up temporary frame directory: temp_videos/2ac6b08b-e900-43c1-ae17-ff2ecc5892a4 +2025-08-18 23:20:25 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_034.mp4' +2025-08-18 23:20:25 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] Video saved to temporary file: temp_videos/b89a7d28-3953-4062-b0ab-66fded3cf1c3.mp4 +2025-08-18 23:20:25 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:20:30 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:20:30 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] 30 frames saved to temp_videos/b89a7d28-3953-4062-b0ab-66fded3cf1c3 +2025-08-18 23:20:31 - INFO - Prompt token length: 2276 +2025-08-18 23:20:40 - INFO - Tokens per second: 11.600796413969313, Peak GPU memory MB: 4498.375 +2025-08-18 23:20:40 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] Inference time: 14.61 seconds, CPU usage: 50.2%, CPU core utilization: [37.3, 59.9, 38.3, 65.4] +2025-08-18 23:20:40 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] Cleaned up temporary file: temp_videos/b89a7d28-3953-4062-b0ab-66fded3cf1c3.mp4 +2025-08-18 23:20:40 - INFO - [b89a7d28-3953-4062-b0ab-66fded3cf1c3] Cleaned up temporary frame directory: temp_videos/b89a7d28-3953-4062-b0ab-66fded3cf1c3 +2025-08-18 23:20:40 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_035.mp4' +2025-08-18 23:20:40 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] Video saved to temporary file: temp_videos/fa74f0b2-3f33-423b-a4f7-8608c214c4b7.mp4 +2025-08-18 23:20:40 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:20:46 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:20:46 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] 30 frames saved to temp_videos/fa74f0b2-3f33-423b-a4f7-8608c214c4b7 +2025-08-18 23:20:46 - INFO - Prompt token length: 2276 +2025-08-18 23:21:05 - INFO - Tokens per second: 13.475444822957598, Peak GPU memory MB: 4498.375 +2025-08-18 23:21:05 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] Inference time: 25.26 seconds, CPU usage: 45.0%, CPU core utilization: [57.4, 30.4, 64.5, 27.5] +2025-08-18 23:21:05 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] Cleaned up temporary file: temp_videos/fa74f0b2-3f33-423b-a4f7-8608c214c4b7.mp4 +2025-08-18 23:21:05 - INFO - [fa74f0b2-3f33-423b-a4f7-8608c214c4b7] Cleaned up temporary frame directory: temp_videos/fa74f0b2-3f33-423b-a4f7-8608c214c4b7 +2025-08-18 23:21:05 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_036.mp4' +2025-08-18 23:21:05 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] Video saved to temporary file: temp_videos/ece00724-8ed0-4af0-95be-793a15dc9c21.mp4 +2025-08-18 23:21:05 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:21:10 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:21:10 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] 30 frames saved to temp_videos/ece00724-8ed0-4af0-95be-793a15dc9c21 +2025-08-18 23:21:11 - INFO - Prompt token length: 2276 +2025-08-18 23:21:17 - INFO - Tokens per second: 10.181369513872784, Peak GPU memory MB: 4498.375 +2025-08-18 23:21:17 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] Inference time: 11.77 seconds, CPU usage: 56.8%, CPU core utilization: [41.9, 41.8, 92.1, 50.9] +2025-08-18 23:21:17 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] Cleaned up temporary file: temp_videos/ece00724-8ed0-4af0-95be-793a15dc9c21.mp4 +2025-08-18 23:21:17 - INFO - [ece00724-8ed0-4af0-95be-793a15dc9c21] Cleaned up temporary frame directory: temp_videos/ece00724-8ed0-4af0-95be-793a15dc9c21 +2025-08-18 23:21:17 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_037.mp4' +2025-08-18 23:21:17 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] Video saved to temporary file: temp_videos/6c0391c7-6b90-48b0-aa61-208a38f34896.mp4 +2025-08-18 23:21:17 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:21:22 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:21:22 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] 30 frames saved to temp_videos/6c0391c7-6b90-48b0-aa61-208a38f34896 +2025-08-18 23:21:22 - INFO - Prompt token length: 2276 +2025-08-18 23:21:41 - INFO - Tokens per second: 13.31561753455536, Peak GPU memory MB: 4498.375 +2025-08-18 23:21:41 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] Inference time: 24.41 seconds, CPU usage: 42.8%, CPU core utilization: [24.5, 26.3, 27.6, 92.7] +2025-08-18 23:21:41 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] Cleaned up temporary file: temp_videos/6c0391c7-6b90-48b0-aa61-208a38f34896.mp4 +2025-08-18 23:21:41 - INFO - [6c0391c7-6b90-48b0-aa61-208a38f34896] Cleaned up temporary frame directory: temp_videos/6c0391c7-6b90-48b0-aa61-208a38f34896 +2025-08-18 23:21:41 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_038.mp4' +2025-08-18 23:21:41 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] Video saved to temporary file: temp_videos/5c94a1b3-c973-49ff-ba0c-0064649ef96a.mp4 +2025-08-18 23:21:41 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:21:46 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:21:46 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] 30 frames saved to temp_videos/5c94a1b3-c973-49ff-ba0c-0064649ef96a +2025-08-18 23:21:46 - INFO - Prompt token length: 2276 +2025-08-18 23:22:05 - INFO - Tokens per second: 13.391948929733484, Peak GPU memory MB: 4498.375 +2025-08-18 23:22:05 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] Inference time: 23.62 seconds, CPU usage: 41.4%, CPU core utilization: [26.0, 21.7, 23.0, 95.0] +2025-08-18 23:22:05 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] Cleaned up temporary file: temp_videos/5c94a1b3-c973-49ff-ba0c-0064649ef96a.mp4 +2025-08-18 23:22:05 - INFO - [5c94a1b3-c973-49ff-ba0c-0064649ef96a] Cleaned up temporary frame directory: temp_videos/5c94a1b3-c973-49ff-ba0c-0064649ef96a +2025-08-18 23:22:05 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_039.mp4' +2025-08-18 23:22:05 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] Video saved to temporary file: temp_videos/5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f.mp4 +2025-08-18 23:22:05 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:22:11 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:22:11 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] 30 frames saved to temp_videos/5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f +2025-08-18 23:22:11 - INFO - Prompt token length: 2276 +2025-08-18 23:22:30 - INFO - Tokens per second: 13.257980068291804, Peak GPU memory MB: 4498.375 +2025-08-18 23:22:30 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] Inference time: 24.98 seconds, CPU usage: 44.0%, CPU core utilization: [30.0, 85.9, 27.0, 33.1] +2025-08-18 23:22:30 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] Cleaned up temporary file: temp_videos/5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f.mp4 +2025-08-18 23:22:30 - INFO - [5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f] Cleaned up temporary frame directory: temp_videos/5f4b1eea-01e8-4e1b-aa03-89b029ebcd0f +2025-08-18 23:22:30 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_040.mp4' +2025-08-18 23:22:30 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] Video saved to temporary file: temp_videos/160706cc-ca43-47e3-888a-b3980e406009.mp4 +2025-08-18 23:22:30 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:22:36 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:22:36 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] 30 frames saved to temp_videos/160706cc-ca43-47e3-888a-b3980e406009 +2025-08-18 23:22:37 - INFO - Prompt token length: 2276 +2025-08-18 23:22:55 - INFO - Tokens per second: 13.47823802494779, Peak GPU memory MB: 4498.375 +2025-08-18 23:22:55 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] Inference time: 25.22 seconds, CPU usage: 45.0%, CPU core utilization: [59.7, 42.4, 48.5, 29.5] +2025-08-18 23:22:55 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] Cleaned up temporary file: temp_videos/160706cc-ca43-47e3-888a-b3980e406009.mp4 +2025-08-18 23:22:55 - INFO - [160706cc-ca43-47e3-888a-b3980e406009] Cleaned up temporary frame directory: temp_videos/160706cc-ca43-47e3-888a-b3980e406009 +2025-08-18 23:22:55 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_041.mp4' +2025-08-18 23:22:55 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] Video saved to temporary file: temp_videos/b72c2383-0bd4-47c1-8045-c4ce316a5528.mp4 +2025-08-18 23:22:55 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:23:00 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:23:00 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] 30 frames saved to temp_videos/b72c2383-0bd4-47c1-8045-c4ce316a5528 +2025-08-18 23:23:01 - INFO - Prompt token length: 2276 +2025-08-18 23:23:06 - INFO - Tokens per second: 9.739024302972952, Peak GPU memory MB: 4498.375 +2025-08-18 23:23:06 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] Inference time: 10.98 seconds, CPU usage: 57.1%, CPU core utilization: [41.5, 45.6, 92.8, 48.6] +2025-08-18 23:23:06 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] Cleaned up temporary file: temp_videos/b72c2383-0bd4-47c1-8045-c4ce316a5528.mp4 +2025-08-18 23:23:06 - INFO - [b72c2383-0bd4-47c1-8045-c4ce316a5528] Cleaned up temporary frame directory: temp_videos/b72c2383-0bd4-47c1-8045-c4ce316a5528 +2025-08-18 23:23:06 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_042.mp4' +2025-08-18 23:23:06 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] Video saved to temporary file: temp_videos/21b335d9-e432-4a1e-b7ed-cd619a6f97c5.mp4 +2025-08-18 23:23:06 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:23:12 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:23:12 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] 30 frames saved to temp_videos/21b335d9-e432-4a1e-b7ed-cd619a6f97c5 +2025-08-18 23:23:13 - INFO - Prompt token length: 2276 +2025-08-18 23:23:31 - INFO - Tokens per second: 13.462089064329856, Peak GPU memory MB: 4498.375 +2025-08-18 23:23:31 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] Inference time: 24.58 seconds, CPU usage: 43.8%, CPU core utilization: [36.6, 25.6, 82.1, 30.8] +2025-08-18 23:23:31 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] Cleaned up temporary file: temp_videos/21b335d9-e432-4a1e-b7ed-cd619a6f97c5.mp4 +2025-08-18 23:23:31 - INFO - [21b335d9-e432-4a1e-b7ed-cd619a6f97c5] Cleaned up temporary frame directory: temp_videos/21b335d9-e432-4a1e-b7ed-cd619a6f97c5 +2025-08-18 23:23:31 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_043.mp4' +2025-08-18 23:23:31 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] Video saved to temporary file: temp_videos/cf57af90-5f0c-4981-a92e-ac1f84763e53.mp4 +2025-08-18 23:23:31 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:23:37 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:23:38 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] 30 frames saved to temp_videos/cf57af90-5f0c-4981-a92e-ac1f84763e53 +2025-08-18 23:23:38 - INFO - Prompt token length: 2276 +2025-08-18 23:23:44 - INFO - Tokens per second: 10.093422721507519, Peak GPU memory MB: 4498.375 +2025-08-18 23:23:44 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] Inference time: 13.12 seconds, CPU usage: 59.8%, CPU core utilization: [88.6, 48.6, 49.1, 52.8] +2025-08-18 23:23:44 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] Cleaned up temporary file: temp_videos/cf57af90-5f0c-4981-a92e-ac1f84763e53.mp4 +2025-08-18 23:23:44 - INFO - [cf57af90-5f0c-4981-a92e-ac1f84763e53] Cleaned up temporary frame directory: temp_videos/cf57af90-5f0c-4981-a92e-ac1f84763e53 +2025-08-18 23:23:44 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_044.mp4' +2025-08-18 23:23:44 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] Video saved to temporary file: temp_videos/65e78a63-a0ee-429e-b30c-b1a284f71bcb.mp4 +2025-08-18 23:23:44 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:23:50 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:23:51 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] 30 frames saved to temp_videos/65e78a63-a0ee-429e-b30c-b1a284f71bcb +2025-08-18 23:23:51 - INFO - Prompt token length: 2276 +2025-08-18 23:23:58 - INFO - Tokens per second: 10.718624704506539, Peak GPU memory MB: 4498.375 +2025-08-18 23:23:58 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] Inference time: 14.04 seconds, CPU usage: 57.9%, CPU core utilization: [49.1, 82.8, 46.4, 53.1] +2025-08-18 23:23:58 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] Cleaned up temporary file: temp_videos/65e78a63-a0ee-429e-b30c-b1a284f71bcb.mp4 +2025-08-18 23:23:58 - INFO - [65e78a63-a0ee-429e-b30c-b1a284f71bcb] Cleaned up temporary frame directory: temp_videos/65e78a63-a0ee-429e-b30c-b1a284f71bcb +2025-08-18 23:23:58 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_045.mp4' +2025-08-18 23:23:58 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] Video saved to temporary file: temp_videos/132ebe35-6429-4e7f-a194-8641fbdb9516.mp4 +2025-08-18 23:23:58 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:24:03 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:24:03 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] 30 frames saved to temp_videos/132ebe35-6429-4e7f-a194-8641fbdb9516 +2025-08-18 23:24:04 - INFO - Prompt token length: 2276 +2025-08-18 23:24:12 - INFO - Tokens per second: 11.17376621816953, Peak GPU memory MB: 4498.375 +2025-08-18 23:24:12 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] Inference time: 13.78 seconds, CPU usage: 51.4%, CPU core utilization: [35.8, 78.6, 39.3, 52.0] +2025-08-18 23:24:12 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] Cleaned up temporary file: temp_videos/132ebe35-6429-4e7f-a194-8641fbdb9516.mp4 +2025-08-18 23:24:12 - INFO - [132ebe35-6429-4e7f-a194-8641fbdb9516] Cleaned up temporary frame directory: temp_videos/132ebe35-6429-4e7f-a194-8641fbdb9516 +2025-08-18 23:24:12 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_046.mp4' +2025-08-18 23:24:12 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] Video saved to temporary file: temp_videos/966cd606-8ad8-4bc5-bb51-1c38cb64d84c.mp4 +2025-08-18 23:24:12 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:24:17 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:24:17 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] 30 frames saved to temp_videos/966cd606-8ad8-4bc5-bb51-1c38cb64d84c +2025-08-18 23:24:18 - INFO - Prompt token length: 2276 +2025-08-18 23:24:22 - INFO - Tokens per second: 8.889578335077823, Peak GPU memory MB: 4498.375 +2025-08-18 23:24:22 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] Inference time: 10.13 seconds, CPU usage: 57.5%, CPU core utilization: [86.8, 47.6, 52.0, 43.7] +2025-08-18 23:24:22 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] Cleaned up temporary file: temp_videos/966cd606-8ad8-4bc5-bb51-1c38cb64d84c.mp4 +2025-08-18 23:24:22 - INFO - [966cd606-8ad8-4bc5-bb51-1c38cb64d84c] Cleaned up temporary frame directory: temp_videos/966cd606-8ad8-4bc5-bb51-1c38cb64d84c +2025-08-18 23:24:22 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_047.mp4' +2025-08-18 23:24:22 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] Video saved to temporary file: temp_videos/cc533dcc-5c04-4d7a-b6fc-cd0127815c18.mp4 +2025-08-18 23:24:22 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-18 23:24:25 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] Extracted 30 frames successfully. Saving to temporary files... +2025-08-18 23:24:25 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] 30 frames saved to temp_videos/cc533dcc-5c04-4d7a-b6fc-cd0127815c18 +2025-08-18 23:24:25 - INFO - Prompt token length: 2276 +2025-08-18 23:24:32 - INFO - Tokens per second: 10.1511797285819, Peak GPU memory MB: 4498.375 +2025-08-18 23:24:32 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] Inference time: 9.24 seconds, CPU usage: 46.0%, CPU core utilization: [36.6, 28.2, 86.3, 32.9] +2025-08-18 23:24:32 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] Cleaned up temporary file: temp_videos/cc533dcc-5c04-4d7a-b6fc-cd0127815c18.mp4 +2025-08-18 23:24:32 - INFO - [cc533dcc-5c04-4d7a-b6fc-cd0127815c18] Cleaned up temporary frame directory: temp_videos/cc533dcc-5c04-4d7a-b6fc-cd0127815c18 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250819_010913.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250819_010913.log new file mode 100644 index 0000000000000000000000000000000000000000..4019d98d56d6d8b5712fb82d15c3599b53c0631e --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250819_010913.log @@ -0,0 +1,954 @@ +2025-08-19 01:09:13 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-19 01:09:16 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-19 01:09:43 - INFO - Model loaded in 30.35 seconds +2025-08-19 01:09:43 - INFO - GPU Memory Usage after model load: 2369.47 MB +2025-08-19 01:10:24 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 01:10:24 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] Video saved to temporary file: temp_videos/11e28f7b-ae0e-4f19-bb18-ad0a5216da06.mp4 +2025-08-19 01:10:24 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:10:27 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:10:27 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] 30 frames saved to temp_videos/11e28f7b-ae0e-4f19-bb18-ad0a5216da06 +2025-08-19 01:10:27 - INFO - Prompt token length: 2276 +2025-08-19 01:10:36 - INFO - Tokens per second: 14.871808817485372, Peak GPU memory MB: 4498.375 +2025-08-19 01:10:36 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] Inference time: 11.54 seconds, CPU usage: 13.0%, CPU core utilization: [11.0, 11.8, 12.0, 17.2] +2025-08-19 01:10:36 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] Cleaned up temporary file: temp_videos/11e28f7b-ae0e-4f19-bb18-ad0a5216da06.mp4 +2025-08-19 01:10:36 - INFO - [11e28f7b-ae0e-4f19-bb18-ad0a5216da06] Cleaned up temporary frame directory: temp_videos/11e28f7b-ae0e-4f19-bb18-ad0a5216da06 +2025-08-19 01:10:36 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_002.mp4' +2025-08-19 01:10:36 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] Video saved to temporary file: temp_videos/143f987d-f1da-4670-9f7a-e1e8acadb666.mp4 +2025-08-19 01:10:36 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:10:41 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:10:41 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] 30 frames saved to temp_videos/143f987d-f1da-4670-9f7a-e1e8acadb666 +2025-08-19 01:10:42 - INFO - Prompt token length: 2276 +2025-08-19 01:10:49 - INFO - Tokens per second: 15.23367933717406, Peak GPU memory MB: 4498.375 +2025-08-19 01:10:49 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] Inference time: 13.47 seconds, CPU usage: 56.0%, CPU core utilization: [72.9, 43.3, 62.7, 44.9] +2025-08-19 01:10:49 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] Cleaned up temporary file: temp_videos/143f987d-f1da-4670-9f7a-e1e8acadb666.mp4 +2025-08-19 01:10:49 - INFO - [143f987d-f1da-4670-9f7a-e1e8acadb666] Cleaned up temporary frame directory: temp_videos/143f987d-f1da-4670-9f7a-e1e8acadb666 +2025-08-19 01:10:49 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_003.mp4' +2025-08-19 01:10:49 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] Video saved to temporary file: temp_videos/68ae3413-63a3-4378-9ae6-d805c60859f0.mp4 +2025-08-19 01:10:49 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:10:55 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:10:55 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] 30 frames saved to temp_videos/68ae3413-63a3-4378-9ae6-d805c60859f0 +2025-08-19 01:10:55 - INFO - Prompt token length: 2276 +2025-08-19 01:11:02 - INFO - Tokens per second: 15.081603395523958, Peak GPU memory MB: 4498.375 +2025-08-19 01:11:02 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] Inference time: 12.42 seconds, CPU usage: 56.0%, CPU core utilization: [44.3, 42.4, 43.2, 94.3] +2025-08-19 01:11:02 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] Cleaned up temporary file: temp_videos/68ae3413-63a3-4378-9ae6-d805c60859f0.mp4 +2025-08-19 01:11:02 - INFO - [68ae3413-63a3-4378-9ae6-d805c60859f0] Cleaned up temporary frame directory: temp_videos/68ae3413-63a3-4378-9ae6-d805c60859f0 +2025-08-19 01:11:02 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_004.mp4' +2025-08-19 01:11:02 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] Video saved to temporary file: temp_videos/63018021-27be-4e27-8345-a2d3e2498736.mp4 +2025-08-19 01:11:02 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:11:07 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:11:07 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] 30 frames saved to temp_videos/63018021-27be-4e27-8345-a2d3e2498736 +2025-08-19 01:11:07 - INFO - Prompt token length: 2276 +2025-08-19 01:11:14 - INFO - Tokens per second: 15.126231678377016, Peak GPU memory MB: 4498.375 +2025-08-19 01:11:14 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] Inference time: 12.43 seconds, CPU usage: 55.9%, CPU core utilization: [43.1, 41.6, 44.7, 93.8] +2025-08-19 01:11:14 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] Cleaned up temporary file: temp_videos/63018021-27be-4e27-8345-a2d3e2498736.mp4 +2025-08-19 01:11:14 - INFO - [63018021-27be-4e27-8345-a2d3e2498736] Cleaned up temporary frame directory: temp_videos/63018021-27be-4e27-8345-a2d3e2498736 +2025-08-19 01:11:14 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_005.mp4' +2025-08-19 01:11:14 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] Video saved to temporary file: temp_videos/3eeb6519-1942-470c-8ebd-48c7eed5d06e.mp4 +2025-08-19 01:11:14 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:11:20 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:11:20 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] 30 frames saved to temp_videos/3eeb6519-1942-470c-8ebd-48c7eed5d06e +2025-08-19 01:11:20 - INFO - Prompt token length: 2276 +2025-08-19 01:11:26 - INFO - Tokens per second: 15.212259839740025, Peak GPU memory MB: 4498.375 +2025-08-19 01:11:26 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] Inference time: 11.86 seconds, CPU usage: 58.6%, CPU core utilization: [66.9, 44.0, 72.7, 50.9] +2025-08-19 01:11:26 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] Cleaned up temporary file: temp_videos/3eeb6519-1942-470c-8ebd-48c7eed5d06e.mp4 +2025-08-19 01:11:26 - INFO - [3eeb6519-1942-470c-8ebd-48c7eed5d06e] Cleaned up temporary frame directory: temp_videos/3eeb6519-1942-470c-8ebd-48c7eed5d06e +2025-08-19 01:11:26 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_006.mp4' +2025-08-19 01:11:26 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] Video saved to temporary file: temp_videos/47c0a6ca-9907-43fe-b2e5-8aa8a7eda393.mp4 +2025-08-19 01:11:26 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:11:32 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:11:32 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] 30 frames saved to temp_videos/47c0a6ca-9907-43fe-b2e5-8aa8a7eda393 +2025-08-19 01:11:32 - INFO - Prompt token length: 2276 +2025-08-19 01:11:38 - INFO - Tokens per second: 15.212647055565592, Peak GPU memory MB: 4498.375 +2025-08-19 01:11:38 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] Inference time: 12.11 seconds, CPU usage: 58.5%, CPU core utilization: [60.4, 48.9, 79.1, 45.4] +2025-08-19 01:11:38 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] Cleaned up temporary file: temp_videos/47c0a6ca-9907-43fe-b2e5-8aa8a7eda393.mp4 +2025-08-19 01:11:38 - INFO - [47c0a6ca-9907-43fe-b2e5-8aa8a7eda393] Cleaned up temporary frame directory: temp_videos/47c0a6ca-9907-43fe-b2e5-8aa8a7eda393 +2025-08-19 01:11:38 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_007.mp4' +2025-08-19 01:11:38 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] Video saved to temporary file: temp_videos/3dab996e-60b6-4810-9b72-3c3bbb4269d7.mp4 +2025-08-19 01:11:38 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:11:44 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:11:44 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] 30 frames saved to temp_videos/3dab996e-60b6-4810-9b72-3c3bbb4269d7 +2025-08-19 01:11:44 - INFO - Prompt token length: 2276 +2025-08-19 01:11:52 - INFO - Tokens per second: 15.061764161404504, Peak GPU memory MB: 4498.375 +2025-08-19 01:11:52 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] Inference time: 13.31 seconds, CPU usage: 56.3%, CPU core utilization: [45.1, 86.2, 43.5, 50.2] +2025-08-19 01:11:52 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] Cleaned up temporary file: temp_videos/3dab996e-60b6-4810-9b72-3c3bbb4269d7.mp4 +2025-08-19 01:11:52 - INFO - [3dab996e-60b6-4810-9b72-3c3bbb4269d7] Cleaned up temporary frame directory: temp_videos/3dab996e-60b6-4810-9b72-3c3bbb4269d7 +2025-08-19 01:11:52 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_008.mp4' +2025-08-19 01:11:52 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] Video saved to temporary file: temp_videos/fe180ec7-857f-4f74-9bf1-f9df04a022f9.mp4 +2025-08-19 01:11:52 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:11:57 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:11:57 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] 30 frames saved to temp_videos/fe180ec7-857f-4f74-9bf1-f9df04a022f9 +2025-08-19 01:11:57 - INFO - Prompt token length: 2276 +2025-08-19 01:12:16 - INFO - Tokens per second: 15.195056179539952, Peak GPU memory MB: 4498.375 +2025-08-19 01:12:16 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] Inference time: 24.08 seconds, CPU usage: 42.2%, CPU core utilization: [28.4, 34.8, 22.9, 82.6] +2025-08-19 01:12:16 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] Cleaned up temporary file: temp_videos/fe180ec7-857f-4f74-9bf1-f9df04a022f9.mp4 +2025-08-19 01:12:16 - INFO - [fe180ec7-857f-4f74-9bf1-f9df04a022f9] Cleaned up temporary frame directory: temp_videos/fe180ec7-857f-4f74-9bf1-f9df04a022f9 +2025-08-19 01:12:16 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_009.mp4' +2025-08-19 01:12:16 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] Video saved to temporary file: temp_videos/cd5ae68b-7492-4a27-9413-da254e9c8d05.mp4 +2025-08-19 01:12:16 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:12:20 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:12:20 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] 30 frames saved to temp_videos/cd5ae68b-7492-4a27-9413-da254e9c8d05 +2025-08-19 01:12:20 - INFO - Prompt token length: 2276 +2025-08-19 01:12:29 - INFO - Tokens per second: 15.094158601086663, Peak GPU memory MB: 4498.375 +2025-08-19 01:12:29 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] Inference time: 13.62 seconds, CPU usage: 50.8%, CPU core utilization: [35.3, 62.5, 39.4, 66.2] +2025-08-19 01:12:29 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] Cleaned up temporary file: temp_videos/cd5ae68b-7492-4a27-9413-da254e9c8d05.mp4 +2025-08-19 01:12:29 - INFO - [cd5ae68b-7492-4a27-9413-da254e9c8d05] Cleaned up temporary frame directory: temp_videos/cd5ae68b-7492-4a27-9413-da254e9c8d05 +2025-08-19 01:12:30 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_010.mp4' +2025-08-19 01:12:30 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] Video saved to temporary file: temp_videos/d4f6d45e-adf2-4488-936e-fb56b66ed4c3.mp4 +2025-08-19 01:12:30 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:12:35 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:12:35 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] 30 frames saved to temp_videos/d4f6d45e-adf2-4488-936e-fb56b66ed4c3 +2025-08-19 01:12:35 - INFO - Prompt token length: 2276 +2025-08-19 01:12:43 - INFO - Tokens per second: 15.216574058636295, Peak GPU memory MB: 4498.375 +2025-08-19 01:12:43 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] Inference time: 13.19 seconds, CPU usage: 54.9%, CPU core utilization: [52.7, 40.1, 82.7, 44.1] +2025-08-19 01:12:43 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] Cleaned up temporary file: temp_videos/d4f6d45e-adf2-4488-936e-fb56b66ed4c3.mp4 +2025-08-19 01:12:43 - INFO - [d4f6d45e-adf2-4488-936e-fb56b66ed4c3] Cleaned up temporary frame directory: temp_videos/d4f6d45e-adf2-4488-936e-fb56b66ed4c3 +2025-08-19 01:12:43 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_011.mp4' +2025-08-19 01:12:43 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] Video saved to temporary file: temp_videos/abbde959-bba1-4bb7-963f-32aa383916f1.mp4 +2025-08-19 01:12:43 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:12:48 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:12:48 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] 30 frames saved to temp_videos/abbde959-bba1-4bb7-963f-32aa383916f1 +2025-08-19 01:12:48 - INFO - Prompt token length: 2276 +2025-08-19 01:12:58 - INFO - Tokens per second: 15.021552487173862, Peak GPU memory MB: 4498.375 +2025-08-19 01:12:58 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] Inference time: 15.39 seconds, CPU usage: 52.7%, CPU core utilization: [36.5, 55.7, 42.3, 76.3] +2025-08-19 01:12:58 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] Cleaned up temporary file: temp_videos/abbde959-bba1-4bb7-963f-32aa383916f1.mp4 +2025-08-19 01:12:58 - INFO - [abbde959-bba1-4bb7-963f-32aa383916f1] Cleaned up temporary frame directory: temp_videos/abbde959-bba1-4bb7-963f-32aa383916f1 +2025-08-19 01:12:58 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_012.mp4' +2025-08-19 01:12:58 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] Video saved to temporary file: temp_videos/caa14a57-17d1-44e4-999d-31592969f55d.mp4 +2025-08-19 01:12:58 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:13:04 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:13:04 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] 30 frames saved to temp_videos/caa14a57-17d1-44e4-999d-31592969f55d +2025-08-19 01:13:04 - INFO - Prompt token length: 2276 +2025-08-19 01:13:10 - INFO - Tokens per second: 15.13753955334378, Peak GPU memory MB: 4498.375 +2025-08-19 01:13:10 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] Inference time: 11.84 seconds, CPU usage: 60.9%, CPU core utilization: [55.8, 92.6, 47.5, 47.6] +2025-08-19 01:13:10 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] Cleaned up temporary file: temp_videos/caa14a57-17d1-44e4-999d-31592969f55d.mp4 +2025-08-19 01:13:10 - INFO - [caa14a57-17d1-44e4-999d-31592969f55d] Cleaned up temporary frame directory: temp_videos/caa14a57-17d1-44e4-999d-31592969f55d +2025-08-19 01:13:10 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_013.mp4' +2025-08-19 01:13:10 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] Video saved to temporary file: temp_videos/cf45ea1f-afb3-4fb0-b720-ecb1748d5f72.mp4 +2025-08-19 01:13:10 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:13:15 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:13:15 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] 30 frames saved to temp_videos/cf45ea1f-afb3-4fb0-b720-ecb1748d5f72 +2025-08-19 01:13:16 - INFO - Prompt token length: 2276 +2025-08-19 01:13:22 - INFO - Tokens per second: 15.243910636194112, Peak GPU memory MB: 4498.375 +2025-08-19 01:13:22 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] Inference time: 11.89 seconds, CPU usage: 58.9%, CPU core utilization: [52.1, 48.5, 87.9, 47.1] +2025-08-19 01:13:22 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] Cleaned up temporary file: temp_videos/cf45ea1f-afb3-4fb0-b720-ecb1748d5f72.mp4 +2025-08-19 01:13:22 - INFO - [cf45ea1f-afb3-4fb0-b720-ecb1748d5f72] Cleaned up temporary frame directory: temp_videos/cf45ea1f-afb3-4fb0-b720-ecb1748d5f72 +2025-08-19 01:13:22 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_014.mp4' +2025-08-19 01:13:22 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] Video saved to temporary file: temp_videos/f305efde-1428-4d87-bc8d-8ccff90b5ce8.mp4 +2025-08-19 01:13:22 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:13:27 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:13:27 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] 30 frames saved to temp_videos/f305efde-1428-4d87-bc8d-8ccff90b5ce8 +2025-08-19 01:13:27 - INFO - Prompt token length: 2276 +2025-08-19 01:13:34 - INFO - Tokens per second: 15.177171379939264, Peak GPU memory MB: 4498.375 +2025-08-19 01:13:34 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] Inference time: 12.16 seconds, CPU usage: 54.4%, CPU core utilization: [50.8, 70.9, 43.4, 52.4] +2025-08-19 01:13:34 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] Cleaned up temporary file: temp_videos/f305efde-1428-4d87-bc8d-8ccff90b5ce8.mp4 +2025-08-19 01:13:34 - INFO - [f305efde-1428-4d87-bc8d-8ccff90b5ce8] Cleaned up temporary frame directory: temp_videos/f305efde-1428-4d87-bc8d-8ccff90b5ce8 +2025-08-19 01:13:34 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_015.mp4' +2025-08-19 01:13:34 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] Video saved to temporary file: temp_videos/db674a81-2a5a-4664-b294-479f1030c672.mp4 +2025-08-19 01:13:34 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:13:40 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:13:40 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] 30 frames saved to temp_videos/db674a81-2a5a-4664-b294-479f1030c672 +2025-08-19 01:13:40 - INFO - Prompt token length: 2276 +2025-08-19 01:13:49 - INFO - Tokens per second: 15.15960774327166, Peak GPU memory MB: 4498.375 +2025-08-19 01:13:49 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] Inference time: 15.14 seconds, CPU usage: 52.8%, CPU core utilization: [43.8, 40.6, 87.5, 39.2] +2025-08-19 01:13:49 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] Cleaned up temporary file: temp_videos/db674a81-2a5a-4664-b294-479f1030c672.mp4 +2025-08-19 01:13:49 - INFO - [db674a81-2a5a-4664-b294-479f1030c672] Cleaned up temporary frame directory: temp_videos/db674a81-2a5a-4664-b294-479f1030c672 +2025-08-19 01:13:49 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_016.mp4' +2025-08-19 01:13:49 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] Video saved to temporary file: temp_videos/1c5ef25a-51df-41ab-a3dd-714bec51e1e0.mp4 +2025-08-19 01:13:49 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:13:54 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:13:54 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] 30 frames saved to temp_videos/1c5ef25a-51df-41ab-a3dd-714bec51e1e0 +2025-08-19 01:13:54 - INFO - Prompt token length: 2276 +2025-08-19 01:14:12 - INFO - Tokens per second: 15.159210675949232, Peak GPU memory MB: 4498.375 +2025-08-19 01:14:12 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] Inference time: 23.05 seconds, CPU usage: 41.5%, CPU core utilization: [23.6, 19.5, 24.5, 98.2] +2025-08-19 01:14:12 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] Cleaned up temporary file: temp_videos/1c5ef25a-51df-41ab-a3dd-714bec51e1e0.mp4 +2025-08-19 01:14:12 - INFO - [1c5ef25a-51df-41ab-a3dd-714bec51e1e0] Cleaned up temporary frame directory: temp_videos/1c5ef25a-51df-41ab-a3dd-714bec51e1e0 +2025-08-19 01:14:12 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_017.mp4' +2025-08-19 01:14:12 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] Video saved to temporary file: temp_videos/6420208d-ce30-4772-8d26-78fc113bd77b.mp4 +2025-08-19 01:14:12 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:14:18 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:14:18 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] 30 frames saved to temp_videos/6420208d-ce30-4772-8d26-78fc113bd77b +2025-08-19 01:14:18 - INFO - Prompt token length: 2276 +2025-08-19 01:14:25 - INFO - Tokens per second: 15.273026527404369, Peak GPU memory MB: 4498.375 +2025-08-19 01:14:25 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] Inference time: 12.70 seconds, CPU usage: 58.6%, CPU core utilization: [59.4, 46.5, 80.3, 48.5] +2025-08-19 01:14:25 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] Cleaned up temporary file: temp_videos/6420208d-ce30-4772-8d26-78fc113bd77b.mp4 +2025-08-19 01:14:25 - INFO - [6420208d-ce30-4772-8d26-78fc113bd77b] Cleaned up temporary frame directory: temp_videos/6420208d-ce30-4772-8d26-78fc113bd77b +2025-08-19 01:14:25 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_018.mp4' +2025-08-19 01:14:25 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] Video saved to temporary file: temp_videos/39cf485a-af07-42cc-bfa7-898b4166a00e.mp4 +2025-08-19 01:14:25 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:14:30 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:14:30 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] 30 frames saved to temp_videos/39cf485a-af07-42cc-bfa7-898b4166a00e +2025-08-19 01:14:30 - INFO - Prompt token length: 2276 +2025-08-19 01:14:49 - INFO - Tokens per second: 15.166782184633584, Peak GPU memory MB: 4498.375 +2025-08-19 01:14:49 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] Inference time: 23.88 seconds, CPU usage: 42.8%, CPU core utilization: [50.4, 48.1, 27.7, 45.2] +2025-08-19 01:14:49 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] Cleaned up temporary file: temp_videos/39cf485a-af07-42cc-bfa7-898b4166a00e.mp4 +2025-08-19 01:14:49 - INFO - [39cf485a-af07-42cc-bfa7-898b4166a00e] Cleaned up temporary frame directory: temp_videos/39cf485a-af07-42cc-bfa7-898b4166a00e +2025-08-19 01:14:49 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_019.mp4' +2025-08-19 01:14:49 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] Video saved to temporary file: temp_videos/32fe6878-5348-4d33-bff2-cb674d7be675.mp4 +2025-08-19 01:14:49 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:14:55 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:14:55 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] 30 frames saved to temp_videos/32fe6878-5348-4d33-bff2-cb674d7be675 +2025-08-19 01:14:55 - INFO - Prompt token length: 2276 +2025-08-19 01:15:04 - INFO - Tokens per second: 15.058410769565253, Peak GPU memory MB: 4498.375 +2025-08-19 01:15:04 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] Inference time: 14.90 seconds, CPU usage: 53.7%, CPU core utilization: [44.7, 64.2, 38.5, 67.6] +2025-08-19 01:15:04 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] Cleaned up temporary file: temp_videos/32fe6878-5348-4d33-bff2-cb674d7be675.mp4 +2025-08-19 01:15:04 - INFO - [32fe6878-5348-4d33-bff2-cb674d7be675] Cleaned up temporary frame directory: temp_videos/32fe6878-5348-4d33-bff2-cb674d7be675 +2025-08-19 01:15:04 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_020.mp4' +2025-08-19 01:15:04 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] Video saved to temporary file: temp_videos/2823e7a8-0fa5-4808-83d6-fe91adddc992.mp4 +2025-08-19 01:15:04 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:15:09 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:15:09 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] 30 frames saved to temp_videos/2823e7a8-0fa5-4808-83d6-fe91adddc992 +2025-08-19 01:15:09 - INFO - Prompt token length: 2276 +2025-08-19 01:15:17 - INFO - Tokens per second: 15.1965029864204, Peak GPU memory MB: 4498.375 +2025-08-19 01:15:17 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] Inference time: 13.39 seconds, CPU usage: 53.4%, CPU core utilization: [70.7, 41.8, 61.9, 39.3] +2025-08-19 01:15:17 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] Cleaned up temporary file: temp_videos/2823e7a8-0fa5-4808-83d6-fe91adddc992.mp4 +2025-08-19 01:15:17 - INFO - [2823e7a8-0fa5-4808-83d6-fe91adddc992] Cleaned up temporary frame directory: temp_videos/2823e7a8-0fa5-4808-83d6-fe91adddc992 +2025-08-19 01:15:17 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_021.mp4' +2025-08-19 01:15:17 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] Video saved to temporary file: temp_videos/9c71f776-8d12-4c41-b39b-9991d3c9c577.mp4 +2025-08-19 01:15:17 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:15:23 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:15:23 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] 30 frames saved to temp_videos/9c71f776-8d12-4c41-b39b-9991d3c9c577 +2025-08-19 01:15:23 - INFO - Prompt token length: 2276 +2025-08-19 01:15:31 - INFO - Tokens per second: 15.19014004168475, Peak GPU memory MB: 4498.375 +2025-08-19 01:15:31 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] Inference time: 13.89 seconds, CPU usage: 53.9%, CPU core utilization: [89.7, 39.6, 43.8, 42.3] +2025-08-19 01:15:31 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] Cleaned up temporary file: temp_videos/9c71f776-8d12-4c41-b39b-9991d3c9c577.mp4 +2025-08-19 01:15:31 - INFO - [9c71f776-8d12-4c41-b39b-9991d3c9c577] Cleaned up temporary frame directory: temp_videos/9c71f776-8d12-4c41-b39b-9991d3c9c577 +2025-08-19 01:15:31 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_022.mp4' +2025-08-19 01:15:31 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] Video saved to temporary file: temp_videos/d01ef2b3-dc62-4072-b7dd-f81f35995e49.mp4 +2025-08-19 01:15:31 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:15:37 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:15:37 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] 30 frames saved to temp_videos/d01ef2b3-dc62-4072-b7dd-f81f35995e49 +2025-08-19 01:15:37 - INFO - Prompt token length: 2276 +2025-08-19 01:15:45 - INFO - Tokens per second: 15.279043428402563, Peak GPU memory MB: 4498.375 +2025-08-19 01:15:45 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] Inference time: 13.58 seconds, CPU usage: 53.4%, CPU core utilization: [43.0, 38.4, 88.7, 43.3] +2025-08-19 01:15:45 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] Cleaned up temporary file: temp_videos/d01ef2b3-dc62-4072-b7dd-f81f35995e49.mp4 +2025-08-19 01:15:45 - INFO - [d01ef2b3-dc62-4072-b7dd-f81f35995e49] Cleaned up temporary frame directory: temp_videos/d01ef2b3-dc62-4072-b7dd-f81f35995e49 +2025-08-19 01:15:45 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_023.mp4' +2025-08-19 01:15:45 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] Video saved to temporary file: temp_videos/dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95.mp4 +2025-08-19 01:15:45 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:15:51 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:15:51 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] 30 frames saved to temp_videos/dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95 +2025-08-19 01:15:51 - INFO - Prompt token length: 2276 +2025-08-19 01:15:58 - INFO - Tokens per second: 15.05226145720559, Peak GPU memory MB: 4498.375 +2025-08-19 01:15:58 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] Inference time: 12.88 seconds, CPU usage: 56.9%, CPU core utilization: [51.3, 84.1, 43.8, 48.3] +2025-08-19 01:15:58 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] Cleaned up temporary file: temp_videos/dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95.mp4 +2025-08-19 01:15:58 - INFO - [dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95] Cleaned up temporary frame directory: temp_videos/dea9fb4b-d3dc-4c78-a6e7-a373d7a61a95 +2025-08-19 01:15:58 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_024.mp4' +2025-08-19 01:15:58 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] Video saved to temporary file: temp_videos/591b687c-c5c2-42cc-8d12-68f43f817205.mp4 +2025-08-19 01:15:58 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:16:03 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:16:03 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] 30 frames saved to temp_videos/591b687c-c5c2-42cc-8d12-68f43f817205 +2025-08-19 01:16:03 - INFO - Prompt token length: 2276 +2025-08-19 01:16:22 - INFO - Tokens per second: 15.240152338245363, Peak GPU memory MB: 4498.375 +2025-08-19 01:16:22 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] Inference time: 23.85 seconds, CPU usage: 43.2%, CPU core utilization: [26.8, 33.4, 27.3, 85.1] +2025-08-19 01:16:22 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] Cleaned up temporary file: temp_videos/591b687c-c5c2-42cc-8d12-68f43f817205.mp4 +2025-08-19 01:16:22 - INFO - [591b687c-c5c2-42cc-8d12-68f43f817205] Cleaned up temporary frame directory: temp_videos/591b687c-c5c2-42cc-8d12-68f43f817205 +2025-08-19 01:16:22 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_025.mp4' +2025-08-19 01:16:22 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] Video saved to temporary file: temp_videos/0913cb77-318c-41a2-90c6-b0cd4cf2f5c3.mp4 +2025-08-19 01:16:22 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:16:27 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:16:27 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] 30 frames saved to temp_videos/0913cb77-318c-41a2-90c6-b0cd4cf2f5c3 +2025-08-19 01:16:28 - INFO - Prompt token length: 2276 +2025-08-19 01:16:35 - INFO - Tokens per second: 15.13505504510679, Peak GPU memory MB: 4498.375 +2025-08-19 01:16:35 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] Inference time: 13.33 seconds, CPU usage: 56.3%, CPU core utilization: [41.0, 47.6, 47.8, 88.9] +2025-08-19 01:16:35 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] Cleaned up temporary file: temp_videos/0913cb77-318c-41a2-90c6-b0cd4cf2f5c3.mp4 +2025-08-19 01:16:35 - INFO - [0913cb77-318c-41a2-90c6-b0cd4cf2f5c3] Cleaned up temporary frame directory: temp_videos/0913cb77-318c-41a2-90c6-b0cd4cf2f5c3 +2025-08-19 01:16:35 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_026.mp4' +2025-08-19 01:16:35 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] Video saved to temporary file: temp_videos/524418f7-97ec-403a-a5e3-d70bd41abf16.mp4 +2025-08-19 01:16:35 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:16:40 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:16:41 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] 30 frames saved to temp_videos/524418f7-97ec-403a-a5e3-d70bd41abf16 +2025-08-19 01:16:41 - INFO - Prompt token length: 2276 +2025-08-19 01:16:48 - INFO - Tokens per second: 15.050975248385665, Peak GPU memory MB: 4498.375 +2025-08-19 01:16:48 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] Inference time: 12.46 seconds, CPU usage: 57.0%, CPU core utilization: [50.2, 44.2, 78.1, 55.7] +2025-08-19 01:16:48 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] Cleaned up temporary file: temp_videos/524418f7-97ec-403a-a5e3-d70bd41abf16.mp4 +2025-08-19 01:16:48 - INFO - [524418f7-97ec-403a-a5e3-d70bd41abf16] Cleaned up temporary frame directory: temp_videos/524418f7-97ec-403a-a5e3-d70bd41abf16 +2025-08-19 01:16:48 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_027.mp4' +2025-08-19 01:16:48 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] Video saved to temporary file: temp_videos/7af4f501-c03c-405d-88d3-3bf049c9e6f1.mp4 +2025-08-19 01:16:48 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:16:53 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:16:53 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] 30 frames saved to temp_videos/7af4f501-c03c-405d-88d3-3bf049c9e6f1 +2025-08-19 01:16:53 - INFO - Prompt token length: 2276 +2025-08-19 01:17:00 - INFO - Tokens per second: 15.113742071308003, Peak GPU memory MB: 4498.375 +2025-08-19 01:17:00 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] Inference time: 12.31 seconds, CPU usage: 54.2%, CPU core utilization: [39.5, 69.8, 67.7, 39.8] +2025-08-19 01:17:00 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] Cleaned up temporary file: temp_videos/7af4f501-c03c-405d-88d3-3bf049c9e6f1.mp4 +2025-08-19 01:17:00 - INFO - [7af4f501-c03c-405d-88d3-3bf049c9e6f1] Cleaned up temporary frame directory: temp_videos/7af4f501-c03c-405d-88d3-3bf049c9e6f1 +2025-08-19 01:17:00 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_028.mp4' +2025-08-19 01:17:00 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] Video saved to temporary file: temp_videos/7d76e78a-a5e7-4f59-a023-61e602e04957.mp4 +2025-08-19 01:17:00 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:17:06 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:17:06 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] 30 frames saved to temp_videos/7d76e78a-a5e7-4f59-a023-61e602e04957 +2025-08-19 01:17:06 - INFO - Prompt token length: 2276 +2025-08-19 01:17:24 - INFO - Tokens per second: 15.184081740207581, Peak GPU memory MB: 4498.375 +2025-08-19 01:17:24 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] Inference time: 24.24 seconds, CPU usage: 44.3%, CPU core utilization: [53.7, 38.7, 44.8, 40.1] +2025-08-19 01:17:24 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] Cleaned up temporary file: temp_videos/7d76e78a-a5e7-4f59-a023-61e602e04957.mp4 +2025-08-19 01:17:24 - INFO - [7d76e78a-a5e7-4f59-a023-61e602e04957] Cleaned up temporary frame directory: temp_videos/7d76e78a-a5e7-4f59-a023-61e602e04957 +2025-08-19 01:17:24 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_029.mp4' +2025-08-19 01:17:24 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] Video saved to temporary file: temp_videos/f6fcad61-7b1c-428e-a88a-eede71857dfc.mp4 +2025-08-19 01:17:24 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:17:30 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:17:30 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] 30 frames saved to temp_videos/f6fcad61-7b1c-428e-a88a-eede71857dfc +2025-08-19 01:17:30 - INFO - Prompt token length: 2276 +2025-08-19 01:17:39 - INFO - Tokens per second: 15.08619472557226, Peak GPU memory MB: 4498.375 +2025-08-19 01:17:39 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] Inference time: 14.88 seconds, CPU usage: 53.2%, CPU core utilization: [60.4, 54.7, 52.5, 45.4] +2025-08-19 01:17:39 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] Cleaned up temporary file: temp_videos/f6fcad61-7b1c-428e-a88a-eede71857dfc.mp4 +2025-08-19 01:17:39 - INFO - [f6fcad61-7b1c-428e-a88a-eede71857dfc] Cleaned up temporary frame directory: temp_videos/f6fcad61-7b1c-428e-a88a-eede71857dfc +2025-08-19 01:17:39 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_030.mp4' +2025-08-19 01:17:39 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] Video saved to temporary file: temp_videos/2e667a4f-c072-4086-a16d-7adfe1c1664a.mp4 +2025-08-19 01:17:39 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:17:45 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:17:45 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] 30 frames saved to temp_videos/2e667a4f-c072-4086-a16d-7adfe1c1664a +2025-08-19 01:17:45 - INFO - Prompt token length: 2276 +2025-08-19 01:17:52 - INFO - Tokens per second: 15.171960313899701, Peak GPU memory MB: 4498.375 +2025-08-19 01:17:52 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] Inference time: 12.88 seconds, CPU usage: 59.9%, CPU core utilization: [81.1, 56.0, 54.9, 47.7] +2025-08-19 01:17:52 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] Cleaned up temporary file: temp_videos/2e667a4f-c072-4086-a16d-7adfe1c1664a.mp4 +2025-08-19 01:17:52 - INFO - [2e667a4f-c072-4086-a16d-7adfe1c1664a] Cleaned up temporary frame directory: temp_videos/2e667a4f-c072-4086-a16d-7adfe1c1664a +2025-08-19 01:17:52 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_031.mp4' +2025-08-19 01:17:52 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] Video saved to temporary file: temp_videos/705229ca-6883-4f4e-be12-4d8ce49c82b5.mp4 +2025-08-19 01:17:52 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:17:58 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:17:58 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] 30 frames saved to temp_videos/705229ca-6883-4f4e-be12-4d8ce49c82b5 +2025-08-19 01:17:58 - INFO - Prompt token length: 2276 +2025-08-19 01:18:04 - INFO - Tokens per second: 15.02481834834144, Peak GPU memory MB: 4498.375 +2025-08-19 01:18:04 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] Inference time: 11.40 seconds, CPU usage: 60.1%, CPU core utilization: [52.9, 95.2, 46.1, 46.1] +2025-08-19 01:18:04 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] Cleaned up temporary file: temp_videos/705229ca-6883-4f4e-be12-4d8ce49c82b5.mp4 +2025-08-19 01:18:04 - INFO - [705229ca-6883-4f4e-be12-4d8ce49c82b5] Cleaned up temporary frame directory: temp_videos/705229ca-6883-4f4e-be12-4d8ce49c82b5 +2025-08-19 01:18:04 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_032.mp4' +2025-08-19 01:18:04 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] Video saved to temporary file: temp_videos/19069313-f58e-41ef-883d-404fe749dc38.mp4 +2025-08-19 01:18:04 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:18:09 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:18:09 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] 30 frames saved to temp_videos/19069313-f58e-41ef-883d-404fe749dc38 +2025-08-19 01:18:10 - INFO - Prompt token length: 2276 +2025-08-19 01:18:16 - INFO - Tokens per second: 15.037380994065911, Peak GPU memory MB: 4498.375 +2025-08-19 01:18:16 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] Inference time: 12.38 seconds, CPU usage: 59.4%, CPU core utilization: [45.0, 68.2, 51.4, 73.2] +2025-08-19 01:18:16 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] Cleaned up temporary file: temp_videos/19069313-f58e-41ef-883d-404fe749dc38.mp4 +2025-08-19 01:18:16 - INFO - [19069313-f58e-41ef-883d-404fe749dc38] Cleaned up temporary frame directory: temp_videos/19069313-f58e-41ef-883d-404fe749dc38 +2025-08-19 01:18:16 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_033.mp4' +2025-08-19 01:18:16 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] Video saved to temporary file: temp_videos/3088d501-a1aa-4822-bdb3-2c72c67494e0.mp4 +2025-08-19 01:18:16 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:18:21 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:18:21 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] 30 frames saved to temp_videos/3088d501-a1aa-4822-bdb3-2c72c67494e0 +2025-08-19 01:18:21 - INFO - Prompt token length: 2276 +2025-08-19 01:18:27 - INFO - Tokens per second: 15.003906375038524, Peak GPU memory MB: 4498.375 +2025-08-19 01:18:27 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] Inference time: 10.86 seconds, CPU usage: 57.5%, CPU core utilization: [79.3, 63.3, 43.3, 44.1] +2025-08-19 01:18:27 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] Cleaned up temporary file: temp_videos/3088d501-a1aa-4822-bdb3-2c72c67494e0.mp4 +2025-08-19 01:18:27 - INFO - [3088d501-a1aa-4822-bdb3-2c72c67494e0] Cleaned up temporary frame directory: temp_videos/3088d501-a1aa-4822-bdb3-2c72c67494e0 +2025-08-19 01:18:27 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_034.mp4' +2025-08-19 01:18:27 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] Video saved to temporary file: temp_videos/fcf56a1e-6109-455c-82c3-fb3ec2c82031.mp4 +2025-08-19 01:18:27 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:18:32 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:18:32 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] 30 frames saved to temp_videos/fcf56a1e-6109-455c-82c3-fb3ec2c82031 +2025-08-19 01:18:33 - INFO - Prompt token length: 2276 +2025-08-19 01:18:39 - INFO - Tokens per second: 15.165868358135322, Peak GPU memory MB: 4498.375 +2025-08-19 01:18:39 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] Inference time: 11.81 seconds, CPU usage: 57.7%, CPU core utilization: [50.1, 46.5, 87.8, 46.3] +2025-08-19 01:18:39 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] Cleaned up temporary file: temp_videos/fcf56a1e-6109-455c-82c3-fb3ec2c82031.mp4 +2025-08-19 01:18:39 - INFO - [fcf56a1e-6109-455c-82c3-fb3ec2c82031] Cleaned up temporary frame directory: temp_videos/fcf56a1e-6109-455c-82c3-fb3ec2c82031 +2025-08-19 01:18:39 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_035.mp4' +2025-08-19 01:18:39 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] Video saved to temporary file: temp_videos/3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4.mp4 +2025-08-19 01:18:39 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:18:44 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:18:44 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] 30 frames saved to temp_videos/3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4 +2025-08-19 01:18:44 - INFO - Prompt token length: 2276 +2025-08-19 01:18:53 - INFO - Tokens per second: 15.110408717530474, Peak GPU memory MB: 4498.375 +2025-08-19 01:18:53 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] Inference time: 13.95 seconds, CPU usage: 52.1%, CPU core utilization: [72.4, 41.1, 57.4, 37.5] +2025-08-19 01:18:53 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] Cleaned up temporary file: temp_videos/3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4.mp4 +2025-08-19 01:18:53 - INFO - [3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4] Cleaned up temporary frame directory: temp_videos/3bc6ae1b-7ddc-43e4-93bb-47f9234c8af4 +2025-08-19 01:18:53 - INFO - [c4928801-e827-4460-884e-95432e149213] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_036.mp4' +2025-08-19 01:18:53 - INFO - [c4928801-e827-4460-884e-95432e149213] Video saved to temporary file: temp_videos/c4928801-e827-4460-884e-95432e149213.mp4 +2025-08-19 01:18:53 - INFO - [c4928801-e827-4460-884e-95432e149213] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:18:58 - INFO - [c4928801-e827-4460-884e-95432e149213] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:18:58 - INFO - [c4928801-e827-4460-884e-95432e149213] 30 frames saved to temp_videos/c4928801-e827-4460-884e-95432e149213 +2025-08-19 01:18:58 - INFO - Prompt token length: 2276 +2025-08-19 01:19:17 - INFO - Tokens per second: 15.223225350422531, Peak GPU memory MB: 4498.375 +2025-08-19 01:19:17 - INFO - [c4928801-e827-4460-884e-95432e149213] Inference time: 24.00 seconds, CPU usage: 43.6%, CPU core utilization: [48.5, 27.3, 71.5, 27.1] +2025-08-19 01:19:17 - INFO - [c4928801-e827-4460-884e-95432e149213] Cleaned up temporary file: temp_videos/c4928801-e827-4460-884e-95432e149213.mp4 +2025-08-19 01:19:17 - INFO - [c4928801-e827-4460-884e-95432e149213] Cleaned up temporary frame directory: temp_videos/c4928801-e827-4460-884e-95432e149213 +2025-08-19 01:19:17 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_037.mp4' +2025-08-19 01:19:17 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] Video saved to temporary file: temp_videos/29b97137-1132-49ca-8595-cc79fe37c223.mp4 +2025-08-19 01:19:17 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:19:22 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:19:22 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] 30 frames saved to temp_videos/29b97137-1132-49ca-8595-cc79fe37c223 +2025-08-19 01:19:23 - INFO - Prompt token length: 2276 +2025-08-19 01:19:28 - INFO - Tokens per second: 15.003346181961264, Peak GPU memory MB: 4498.375 +2025-08-19 01:19:28 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] Inference time: 11.56 seconds, CPU usage: 59.2%, CPU core utilization: [61.4, 70.8, 49.2, 55.3] +2025-08-19 01:19:28 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] Cleaned up temporary file: temp_videos/29b97137-1132-49ca-8595-cc79fe37c223.mp4 +2025-08-19 01:19:28 - INFO - [29b97137-1132-49ca-8595-cc79fe37c223] Cleaned up temporary frame directory: temp_videos/29b97137-1132-49ca-8595-cc79fe37c223 +2025-08-19 01:19:28 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_038.mp4' +2025-08-19 01:19:28 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] Video saved to temporary file: temp_videos/75cf708a-b9a9-4108-8620-48d4fbc3fc3e.mp4 +2025-08-19 01:19:28 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:19:34 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:19:34 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] 30 frames saved to temp_videos/75cf708a-b9a9-4108-8620-48d4fbc3fc3e +2025-08-19 01:19:34 - INFO - Prompt token length: 2276 +2025-08-19 01:19:53 - INFO - Tokens per second: 15.172943466192525, Peak GPU memory MB: 4498.375 +2025-08-19 01:19:53 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] Inference time: 24.13 seconds, CPU usage: 43.8%, CPU core utilization: [60.6, 32.6, 31.0, 51.1] +2025-08-19 01:19:53 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] Cleaned up temporary file: temp_videos/75cf708a-b9a9-4108-8620-48d4fbc3fc3e.mp4 +2025-08-19 01:19:53 - INFO - [75cf708a-b9a9-4108-8620-48d4fbc3fc3e] Cleaned up temporary frame directory: temp_videos/75cf708a-b9a9-4108-8620-48d4fbc3fc3e +2025-08-19 01:19:53 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_039.mp4' +2025-08-19 01:19:53 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] Video saved to temporary file: temp_videos/35980fa0-3354-488b-835c-5a01ab552515.mp4 +2025-08-19 01:19:53 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:19:59 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:19:59 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] 30 frames saved to temp_videos/35980fa0-3354-488b-835c-5a01ab552515 +2025-08-19 01:19:59 - INFO - Prompt token length: 2276 +2025-08-19 01:20:07 - INFO - Tokens per second: 15.153695587333297, Peak GPU memory MB: 4498.375 +2025-08-19 01:20:07 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] Inference time: 14.34 seconds, CPU usage: 55.1%, CPU core utilization: [71.6, 43.4, 46.2, 59.1] +2025-08-19 01:20:07 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] Cleaned up temporary file: temp_videos/35980fa0-3354-488b-835c-5a01ab552515.mp4 +2025-08-19 01:20:07 - INFO - [35980fa0-3354-488b-835c-5a01ab552515] Cleaned up temporary frame directory: temp_videos/35980fa0-3354-488b-835c-5a01ab552515 +2025-08-19 01:20:07 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_040.mp4' +2025-08-19 01:20:07 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] Video saved to temporary file: temp_videos/3ab145c5-391a-4f26-a59d-6805a2de2e45.mp4 +2025-08-19 01:20:07 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:20:12 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:20:12 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] 30 frames saved to temp_videos/3ab145c5-391a-4f26-a59d-6805a2de2e45 +2025-08-19 01:20:12 - INFO - Prompt token length: 2276 +2025-08-19 01:20:19 - INFO - Tokens per second: 15.232834056432706, Peak GPU memory MB: 4498.375 +2025-08-19 01:20:19 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] Inference time: 12.45 seconds, CPU usage: 55.1%, CPU core utilization: [65.2, 50.6, 60.2, 44.3] +2025-08-19 01:20:19 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] Cleaned up temporary file: temp_videos/3ab145c5-391a-4f26-a59d-6805a2de2e45.mp4 +2025-08-19 01:20:19 - INFO - [3ab145c5-391a-4f26-a59d-6805a2de2e45] Cleaned up temporary frame directory: temp_videos/3ab145c5-391a-4f26-a59d-6805a2de2e45 +2025-08-19 01:20:20 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_041.mp4' +2025-08-19 01:20:20 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] Video saved to temporary file: temp_videos/3178c774-d234-41c6-a95f-b3abecf50b48.mp4 +2025-08-19 01:20:20 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:20:24 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:20:24 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] 30 frames saved to temp_videos/3178c774-d234-41c6-a95f-b3abecf50b48 +2025-08-19 01:20:24 - INFO - Prompt token length: 2276 +2025-08-19 01:20:32 - INFO - Tokens per second: 14.954580894213183, Peak GPU memory MB: 4498.375 +2025-08-19 01:20:32 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] Inference time: 12.26 seconds, CPU usage: 52.3%, CPU core utilization: [35.3, 43.6, 44.0, 86.3] +2025-08-19 01:20:32 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] Cleaned up temporary file: temp_videos/3178c774-d234-41c6-a95f-b3abecf50b48.mp4 +2025-08-19 01:20:32 - INFO - [3178c774-d234-41c6-a95f-b3abecf50b48] Cleaned up temporary frame directory: temp_videos/3178c774-d234-41c6-a95f-b3abecf50b48 +2025-08-19 01:20:32 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_042.mp4' +2025-08-19 01:20:32 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] Video saved to temporary file: temp_videos/adf485bb-b362-4651-9671-94360aec95d2.mp4 +2025-08-19 01:20:32 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:20:38 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:20:38 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] 30 frames saved to temp_videos/adf485bb-b362-4651-9671-94360aec95d2 +2025-08-19 01:20:38 - INFO - Prompt token length: 2276 +2025-08-19 01:20:45 - INFO - Tokens per second: 15.159302047347232, Peak GPU memory MB: 4498.375 +2025-08-19 01:20:45 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] Inference time: 13.26 seconds, CPU usage: 57.6%, CPU core utilization: [95.1, 43.6, 43.1, 48.4] +2025-08-19 01:20:45 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] Cleaned up temporary file: temp_videos/adf485bb-b362-4651-9671-94360aec95d2.mp4 +2025-08-19 01:20:45 - INFO - [adf485bb-b362-4651-9671-94360aec95d2] Cleaned up temporary frame directory: temp_videos/adf485bb-b362-4651-9671-94360aec95d2 +2025-08-19 01:20:45 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_043.mp4' +2025-08-19 01:20:45 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] Video saved to temporary file: temp_videos/8c236363-c6e0-45bf-abb2-1397f546385a.mp4 +2025-08-19 01:20:45 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:20:51 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:20:51 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] 30 frames saved to temp_videos/8c236363-c6e0-45bf-abb2-1397f546385a +2025-08-19 01:20:51 - INFO - Prompt token length: 2276 +2025-08-19 01:20:58 - INFO - Tokens per second: 15.018958222653303, Peak GPU memory MB: 4498.375 +2025-08-19 01:20:58 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] Inference time: 12.99 seconds, CPU usage: 56.9%, CPU core utilization: [46.8, 92.5, 43.2, 45.4] +2025-08-19 01:20:58 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] Cleaned up temporary file: temp_videos/8c236363-c6e0-45bf-abb2-1397f546385a.mp4 +2025-08-19 01:20:58 - INFO - [8c236363-c6e0-45bf-abb2-1397f546385a] Cleaned up temporary frame directory: temp_videos/8c236363-c6e0-45bf-abb2-1397f546385a +2025-08-19 01:20:58 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_044.mp4' +2025-08-19 01:20:58 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] Video saved to temporary file: temp_videos/dc3386cb-078e-4620-925b-8c8aa454ecb1.mp4 +2025-08-19 01:20:58 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:21:03 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:21:03 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] 30 frames saved to temp_videos/dc3386cb-078e-4620-925b-8c8aa454ecb1 +2025-08-19 01:21:03 - INFO - Prompt token length: 2276 +2025-08-19 01:21:12 - INFO - Tokens per second: 15.125037916461388, Peak GPU memory MB: 4498.375 +2025-08-19 01:21:12 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] Inference time: 13.79 seconds, CPU usage: 51.8%, CPU core utilization: [38.0, 78.6, 38.2, 52.3] +2025-08-19 01:21:12 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] Cleaned up temporary file: temp_videos/dc3386cb-078e-4620-925b-8c8aa454ecb1.mp4 +2025-08-19 01:21:12 - INFO - [dc3386cb-078e-4620-925b-8c8aa454ecb1] Cleaned up temporary frame directory: temp_videos/dc3386cb-078e-4620-925b-8c8aa454ecb1 +2025-08-19 01:21:12 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_045.mp4' +2025-08-19 01:21:12 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] Video saved to temporary file: temp_videos/7eea5b82-bfe1-43bd-b621-3efa4b76d979.mp4 +2025-08-19 01:21:12 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:21:17 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:21:17 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] 30 frames saved to temp_videos/7eea5b82-bfe1-43bd-b621-3efa4b76d979 +2025-08-19 01:21:17 - INFO - Prompt token length: 2276 +2025-08-19 01:21:24 - INFO - Tokens per second: 15.253951389421339, Peak GPU memory MB: 4498.375 +2025-08-19 01:21:24 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] Inference time: 12.07 seconds, CPU usage: 55.0%, CPU core utilization: [59.4, 41.3, 76.1, 43.1] +2025-08-19 01:21:24 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] Cleaned up temporary file: temp_videos/7eea5b82-bfe1-43bd-b621-3efa4b76d979.mp4 +2025-08-19 01:21:24 - INFO - [7eea5b82-bfe1-43bd-b621-3efa4b76d979] Cleaned up temporary frame directory: temp_videos/7eea5b82-bfe1-43bd-b621-3efa4b76d979 +2025-08-19 01:21:24 - INFO - [75167769-2224-4e52-ac11-6bc010913938] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_046.mp4' +2025-08-19 01:21:24 - INFO - [75167769-2224-4e52-ac11-6bc010913938] Video saved to temporary file: temp_videos/75167769-2224-4e52-ac11-6bc010913938.mp4 +2025-08-19 01:21:24 - INFO - [75167769-2224-4e52-ac11-6bc010913938] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:21:30 - INFO - [75167769-2224-4e52-ac11-6bc010913938] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:21:30 - INFO - [75167769-2224-4e52-ac11-6bc010913938] 30 frames saved to temp_videos/75167769-2224-4e52-ac11-6bc010913938 +2025-08-19 01:21:30 - INFO - Prompt token length: 2276 +2025-08-19 01:21:36 - INFO - Tokens per second: 15.184556314585924, Peak GPU memory MB: 4498.375 +2025-08-19 01:21:36 - INFO - [75167769-2224-4e52-ac11-6bc010913938] Inference time: 12.24 seconds, CPU usage: 58.4%, CPU core utilization: [59.3, 45.4, 80.2, 48.6] +2025-08-19 01:21:36 - INFO - [75167769-2224-4e52-ac11-6bc010913938] Cleaned up temporary file: temp_videos/75167769-2224-4e52-ac11-6bc010913938.mp4 +2025-08-19 01:21:36 - INFO - [75167769-2224-4e52-ac11-6bc010913938] Cleaned up temporary frame directory: temp_videos/75167769-2224-4e52-ac11-6bc010913938 +2025-08-19 01:21:36 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_047.mp4' +2025-08-19 01:21:36 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] Video saved to temporary file: temp_videos/1f4f3149-dfb4-4c1e-9459-8fa0f9c65803.mp4 +2025-08-19 01:21:36 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:21:41 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:21:41 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] 30 frames saved to temp_videos/1f4f3149-dfb4-4c1e-9459-8fa0f9c65803 +2025-08-19 01:21:41 - INFO - Prompt token length: 2276 +2025-08-19 01:22:00 - INFO - Tokens per second: 15.207603805981988, Peak GPU memory MB: 4498.375 +2025-08-19 01:22:00 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] Inference time: 23.66 seconds, CPU usage: 43.6%, CPU core utilization: [36.5, 29.1, 84.5, 24.3] +2025-08-19 01:22:00 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] Cleaned up temporary file: temp_videos/1f4f3149-dfb4-4c1e-9459-8fa0f9c65803.mp4 +2025-08-19 01:22:00 - INFO - [1f4f3149-dfb4-4c1e-9459-8fa0f9c65803] Cleaned up temporary frame directory: temp_videos/1f4f3149-dfb4-4c1e-9459-8fa0f9c65803 +2025-08-19 01:22:00 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_048.mp4' +2025-08-19 01:22:00 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] Video saved to temporary file: temp_videos/ccc1995c-43f2-4e3d-8e43-b1b264592a8b.mp4 +2025-08-19 01:22:00 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:22:05 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:22:05 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] 30 frames saved to temp_videos/ccc1995c-43f2-4e3d-8e43-b1b264592a8b +2025-08-19 01:22:05 - INFO - Prompt token length: 2276 +2025-08-19 01:22:12 - INFO - Tokens per second: 15.161178501396146, Peak GPU memory MB: 4498.375 +2025-08-19 01:22:12 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] Inference time: 12.01 seconds, CPU usage: 56.5%, CPU core utilization: [45.3, 88.5, 48.9, 43.0] +2025-08-19 01:22:12 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] Cleaned up temporary file: temp_videos/ccc1995c-43f2-4e3d-8e43-b1b264592a8b.mp4 +2025-08-19 01:22:12 - INFO - [ccc1995c-43f2-4e3d-8e43-b1b264592a8b] Cleaned up temporary frame directory: temp_videos/ccc1995c-43f2-4e3d-8e43-b1b264592a8b +2025-08-19 01:22:12 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_049.mp4' +2025-08-19 01:22:12 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] Video saved to temporary file: temp_videos/62b73819-7c5e-4b0d-9ba7-586bc1aced61.mp4 +2025-08-19 01:22:12 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:22:17 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:22:17 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] 30 frames saved to temp_videos/62b73819-7c5e-4b0d-9ba7-586bc1aced61 +2025-08-19 01:22:17 - INFO - Prompt token length: 2276 +2025-08-19 01:22:24 - INFO - Tokens per second: 15.219667630599881, Peak GPU memory MB: 4498.375 +2025-08-19 01:22:24 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] Inference time: 11.62 seconds, CPU usage: 55.2%, CPU core utilization: [59.3, 41.7, 75.7, 44.2] +2025-08-19 01:22:24 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] Cleaned up temporary file: temp_videos/62b73819-7c5e-4b0d-9ba7-586bc1aced61.mp4 +2025-08-19 01:22:24 - INFO - [62b73819-7c5e-4b0d-9ba7-586bc1aced61] Cleaned up temporary frame directory: temp_videos/62b73819-7c5e-4b0d-9ba7-586bc1aced61 +2025-08-19 01:22:24 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_050.mp4' +2025-08-19 01:22:24 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] Video saved to temporary file: temp_videos/f6fc2d05-cab7-41d7-8b01-b6ecd445daca.mp4 +2025-08-19 01:22:24 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:22:29 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:22:29 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] 30 frames saved to temp_videos/f6fc2d05-cab7-41d7-8b01-b6ecd445daca +2025-08-19 01:22:29 - INFO - Prompt token length: 2276 +2025-08-19 01:22:37 - INFO - Tokens per second: 15.145118635028181, Peak GPU memory MB: 4498.375 +2025-08-19 01:22:37 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] Inference time: 13.58 seconds, CPU usage: 53.7%, CPU core utilization: [48.5, 40.6, 83.8, 41.8] +2025-08-19 01:22:37 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] Cleaned up temporary file: temp_videos/f6fc2d05-cab7-41d7-8b01-b6ecd445daca.mp4 +2025-08-19 01:22:37 - INFO - [f6fc2d05-cab7-41d7-8b01-b6ecd445daca] Cleaned up temporary frame directory: temp_videos/f6fc2d05-cab7-41d7-8b01-b6ecd445daca +2025-08-19 01:22:37 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_051.mp4' +2025-08-19 01:22:37 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] Video saved to temporary file: temp_videos/647cb940-17d0-421a-850e-3f73affbf27d.mp4 +2025-08-19 01:22:37 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:22:43 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:22:43 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] 30 frames saved to temp_videos/647cb940-17d0-421a-850e-3f73affbf27d +2025-08-19 01:22:43 - INFO - Prompt token length: 2276 +2025-08-19 01:23:02 - INFO - Tokens per second: 15.082250533041343, Peak GPU memory MB: 4498.375 +2025-08-19 01:23:02 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] Inference time: 24.17 seconds, CPU usage: 43.9%, CPU core utilization: [38.6, 29.1, 29.3, 78.5] +2025-08-19 01:23:02 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] Cleaned up temporary file: temp_videos/647cb940-17d0-421a-850e-3f73affbf27d.mp4 +2025-08-19 01:23:02 - INFO - [647cb940-17d0-421a-850e-3f73affbf27d] Cleaned up temporary frame directory: temp_videos/647cb940-17d0-421a-850e-3f73affbf27d +2025-08-19 01:23:02 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_052.mp4' +2025-08-19 01:23:02 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] Video saved to temporary file: temp_videos/ffb5ea51-a797-4789-a321-dc7e96c3e735.mp4 +2025-08-19 01:23:02 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:23:06 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:23:06 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] 30 frames saved to temp_videos/ffb5ea51-a797-4789-a321-dc7e96c3e735 +2025-08-19 01:23:07 - INFO - Prompt token length: 2276 +2025-08-19 01:23:25 - INFO - Tokens per second: 15.177486403943025, Peak GPU memory MB: 4498.375 +2025-08-19 01:23:25 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] Inference time: 23.57 seconds, CPU usage: 42.5%, CPU core utilization: [42.4, 25.4, 52.4, 49.8] +2025-08-19 01:23:25 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] Cleaned up temporary file: temp_videos/ffb5ea51-a797-4789-a321-dc7e96c3e735.mp4 +2025-08-19 01:23:25 - INFO - [ffb5ea51-a797-4789-a321-dc7e96c3e735] Cleaned up temporary frame directory: temp_videos/ffb5ea51-a797-4789-a321-dc7e96c3e735 +2025-08-19 01:23:25 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_053.mp4' +2025-08-19 01:23:25 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] Video saved to temporary file: temp_videos/3e552b6f-3b30-4195-a941-9b0420d0da5c.mp4 +2025-08-19 01:23:25 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:23:30 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:23:30 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] 30 frames saved to temp_videos/3e552b6f-3b30-4195-a941-9b0420d0da5c +2025-08-19 01:23:31 - INFO - Prompt token length: 2276 +2025-08-19 01:23:49 - INFO - Tokens per second: 15.081825570037651, Peak GPU memory MB: 4498.375 +2025-08-19 01:23:49 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] Inference time: 24.05 seconds, CPU usage: 43.4%, CPU core utilization: [71.9, 31.3, 37.4, 33.1] +2025-08-19 01:23:49 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] Cleaned up temporary file: temp_videos/3e552b6f-3b30-4195-a941-9b0420d0da5c.mp4 +2025-08-19 01:23:49 - INFO - [3e552b6f-3b30-4195-a941-9b0420d0da5c] Cleaned up temporary frame directory: temp_videos/3e552b6f-3b30-4195-a941-9b0420d0da5c +2025-08-19 01:23:49 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_054.mp4' +2025-08-19 01:23:49 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] Video saved to temporary file: temp_videos/2e96b4af-fca9-4a38-b31d-c5f542f08992.mp4 +2025-08-19 01:23:49 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:23:54 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:23:54 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] 30 frames saved to temp_videos/2e96b4af-fca9-4a38-b31d-c5f542f08992 +2025-08-19 01:23:55 - INFO - Prompt token length: 2276 +2025-08-19 01:24:03 - INFO - Tokens per second: 15.079603719843547, Peak GPU memory MB: 4498.375 +2025-08-19 01:24:03 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] Inference time: 13.95 seconds, CPU usage: 53.3%, CPU core utilization: [41.2, 42.7, 90.3, 39.0] +2025-08-19 01:24:03 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] Cleaned up temporary file: temp_videos/2e96b4af-fca9-4a38-b31d-c5f542f08992.mp4 +2025-08-19 01:24:03 - INFO - [2e96b4af-fca9-4a38-b31d-c5f542f08992] Cleaned up temporary frame directory: temp_videos/2e96b4af-fca9-4a38-b31d-c5f542f08992 +2025-08-19 01:24:03 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_055.mp4' +2025-08-19 01:24:03 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] Video saved to temporary file: temp_videos/86df303a-94ee-4380-bfbf-b1cad948d06d.mp4 +2025-08-19 01:24:03 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:24:09 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:24:09 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] 30 frames saved to temp_videos/86df303a-94ee-4380-bfbf-b1cad948d06d +2025-08-19 01:24:09 - INFO - Prompt token length: 2276 +2025-08-19 01:24:16 - INFO - Tokens per second: 14.890565837651767, Peak GPU memory MB: 4498.375 +2025-08-19 01:24:16 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] Inference time: 12.89 seconds, CPU usage: 58.3%, CPU core utilization: [94.6, 46.2, 44.9, 47.3] +2025-08-19 01:24:16 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] Cleaned up temporary file: temp_videos/86df303a-94ee-4380-bfbf-b1cad948d06d.mp4 +2025-08-19 01:24:16 - INFO - [86df303a-94ee-4380-bfbf-b1cad948d06d] Cleaned up temporary frame directory: temp_videos/86df303a-94ee-4380-bfbf-b1cad948d06d +2025-08-19 01:24:16 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_056.mp4' +2025-08-19 01:24:16 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] Video saved to temporary file: temp_videos/b7571bd3-f3e2-49c0-8247-86b51483ad85.mp4 +2025-08-19 01:24:16 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:24:21 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:24:21 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] 30 frames saved to temp_videos/b7571bd3-f3e2-49c0-8247-86b51483ad85 +2025-08-19 01:24:22 - INFO - Prompt token length: 2276 +2025-08-19 01:24:40 - INFO - Tokens per second: 15.160021425103128, Peak GPU memory MB: 4498.375 +2025-08-19 01:24:40 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] Inference time: 23.87 seconds, CPU usage: 43.2%, CPU core utilization: [44.4, 33.6, 36.8, 57.8] +2025-08-19 01:24:40 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] Cleaned up temporary file: temp_videos/b7571bd3-f3e2-49c0-8247-86b51483ad85.mp4 +2025-08-19 01:24:40 - INFO - [b7571bd3-f3e2-49c0-8247-86b51483ad85] Cleaned up temporary frame directory: temp_videos/b7571bd3-f3e2-49c0-8247-86b51483ad85 +2025-08-19 01:24:40 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_057.mp4' +2025-08-19 01:24:40 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] Video saved to temporary file: temp_videos/935f6a70-b3f0-4724-b23d-5478335314f5.mp4 +2025-08-19 01:24:40 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:24:45 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:24:45 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] 30 frames saved to temp_videos/935f6a70-b3f0-4724-b23d-5478335314f5 +2025-08-19 01:24:46 - INFO - Prompt token length: 2276 +2025-08-19 01:25:04 - INFO - Tokens per second: 15.034732126920906, Peak GPU memory MB: 4498.375 +2025-08-19 01:25:04 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] Inference time: 24.19 seconds, CPU usage: 43.5%, CPU core utilization: [28.4, 28.0, 29.3, 88.6] +2025-08-19 01:25:04 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] Cleaned up temporary file: temp_videos/935f6a70-b3f0-4724-b23d-5478335314f5.mp4 +2025-08-19 01:25:04 - INFO - [935f6a70-b3f0-4724-b23d-5478335314f5] Cleaned up temporary frame directory: temp_videos/935f6a70-b3f0-4724-b23d-5478335314f5 +2025-08-19 01:25:04 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_058.mp4' +2025-08-19 01:25:04 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] Video saved to temporary file: temp_videos/853fa589-d5f7-47dd-a569-fff34bbbb884.mp4 +2025-08-19 01:25:04 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:25:10 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:25:10 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] 30 frames saved to temp_videos/853fa589-d5f7-47dd-a569-fff34bbbb884 +2025-08-19 01:25:10 - INFO - Prompt token length: 2276 +2025-08-19 01:25:19 - INFO - Tokens per second: 15.074772750512258, Peak GPU memory MB: 4498.375 +2025-08-19 01:25:19 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] Inference time: 14.80 seconds, CPU usage: 54.2%, CPU core utilization: [38.8, 59.8, 44.2, 74.0] +2025-08-19 01:25:19 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] Cleaned up temporary file: temp_videos/853fa589-d5f7-47dd-a569-fff34bbbb884.mp4 +2025-08-19 01:25:19 - INFO - [853fa589-d5f7-47dd-a569-fff34bbbb884] Cleaned up temporary frame directory: temp_videos/853fa589-d5f7-47dd-a569-fff34bbbb884 +2025-08-19 01:25:19 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_059.mp4' +2025-08-19 01:25:19 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] Video saved to temporary file: temp_videos/df6ec7ec-f8e6-4cc9-8870-f6be25669ac4.mp4 +2025-08-19 01:25:19 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:25:25 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:25:25 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] 30 frames saved to temp_videos/df6ec7ec-f8e6-4cc9-8870-f6be25669ac4 +2025-08-19 01:25:25 - INFO - Prompt token length: 2276 +2025-08-19 01:25:31 - INFO - Tokens per second: 15.142981091351468, Peak GPU memory MB: 4498.375 +2025-08-19 01:25:31 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] Inference time: 12.09 seconds, CPU usage: 60.1%, CPU core utilization: [65.5, 47.9, 76.2, 50.7] +2025-08-19 01:25:31 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] Cleaned up temporary file: temp_videos/df6ec7ec-f8e6-4cc9-8870-f6be25669ac4.mp4 +2025-08-19 01:25:31 - INFO - [df6ec7ec-f8e6-4cc9-8870-f6be25669ac4] Cleaned up temporary frame directory: temp_videos/df6ec7ec-f8e6-4cc9-8870-f6be25669ac4 +2025-08-19 01:25:31 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_060.mp4' +2025-08-19 01:25:31 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] Video saved to temporary file: temp_videos/49e6e75d-3336-4039-8bc2-f11d5e0f1045.mp4 +2025-08-19 01:25:31 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:25:37 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:25:37 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] 30 frames saved to temp_videos/49e6e75d-3336-4039-8bc2-f11d5e0f1045 +2025-08-19 01:25:37 - INFO - Prompt token length: 2276 +2025-08-19 01:25:43 - INFO - Tokens per second: 15.20735275491903, Peak GPU memory MB: 4498.375 +2025-08-19 01:25:43 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] Inference time: 12.05 seconds, CPU usage: 57.3%, CPU core utilization: [69.3, 62.2, 49.5, 48.2] +2025-08-19 01:25:43 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] Cleaned up temporary file: temp_videos/49e6e75d-3336-4039-8bc2-f11d5e0f1045.mp4 +2025-08-19 01:25:43 - INFO - [49e6e75d-3336-4039-8bc2-f11d5e0f1045] Cleaned up temporary frame directory: temp_videos/49e6e75d-3336-4039-8bc2-f11d5e0f1045 +2025-08-19 01:25:43 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_061.mp4' +2025-08-19 01:25:43 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] Video saved to temporary file: temp_videos/5bbc7266-2c45-4c0f-8b7f-556353194e4b.mp4 +2025-08-19 01:25:43 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:25:49 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:25:49 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] 30 frames saved to temp_videos/5bbc7266-2c45-4c0f-8b7f-556353194e4b +2025-08-19 01:25:49 - INFO - Prompt token length: 2276 +2025-08-19 01:25:59 - INFO - Tokens per second: 15.116573969074487, Peak GPU memory MB: 4498.375 +2025-08-19 01:25:59 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] Inference time: 15.45 seconds, CPU usage: 50.9%, CPU core utilization: [48.3, 45.5, 75.4, 34.2] +2025-08-19 01:25:59 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] Cleaned up temporary file: temp_videos/5bbc7266-2c45-4c0f-8b7f-556353194e4b.mp4 +2025-08-19 01:25:59 - INFO - [5bbc7266-2c45-4c0f-8b7f-556353194e4b] Cleaned up temporary frame directory: temp_videos/5bbc7266-2c45-4c0f-8b7f-556353194e4b +2025-08-19 01:25:59 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_062.mp4' +2025-08-19 01:25:59 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] Video saved to temporary file: temp_videos/8716bb3c-1826-4e6e-83aa-d5227ba910b1.mp4 +2025-08-19 01:25:59 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:26:05 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:26:05 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] 30 frames saved to temp_videos/8716bb3c-1826-4e6e-83aa-d5227ba910b1 +2025-08-19 01:26:05 - INFO - Prompt token length: 2276 +2025-08-19 01:26:13 - INFO - Tokens per second: 15.150096909336943, Peak GPU memory MB: 4498.375 +2025-08-19 01:26:13 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] Inference time: 13.58 seconds, CPU usage: 54.8%, CPU core utilization: [46.6, 47.8, 62.0, 62.6] +2025-08-19 01:26:13 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] Cleaned up temporary file: temp_videos/8716bb3c-1826-4e6e-83aa-d5227ba910b1.mp4 +2025-08-19 01:26:13 - INFO - [8716bb3c-1826-4e6e-83aa-d5227ba910b1] Cleaned up temporary frame directory: temp_videos/8716bb3c-1826-4e6e-83aa-d5227ba910b1 +2025-08-19 01:26:13 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_063.mp4' +2025-08-19 01:26:13 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] Video saved to temporary file: temp_videos/5e484e08-e660-4e51-a3d1-8e9dc7e6ca26.mp4 +2025-08-19 01:26:13 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:26:18 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:26:18 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] 30 frames saved to temp_videos/5e484e08-e660-4e51-a3d1-8e9dc7e6ca26 +2025-08-19 01:26:18 - INFO - Prompt token length: 2276 +2025-08-19 01:26:24 - INFO - Tokens per second: 15.141533992515093, Peak GPU memory MB: 4498.375 +2025-08-19 01:26:24 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] Inference time: 11.33 seconds, CPU usage: 62.7%, CPU core utilization: [48.2, 55.8, 96.4, 50.4] +2025-08-19 01:26:24 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] Cleaned up temporary file: temp_videos/5e484e08-e660-4e51-a3d1-8e9dc7e6ca26.mp4 +2025-08-19 01:26:24 - INFO - [5e484e08-e660-4e51-a3d1-8e9dc7e6ca26] Cleaned up temporary frame directory: temp_videos/5e484e08-e660-4e51-a3d1-8e9dc7e6ca26 +2025-08-19 01:26:24 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_064.mp4' +2025-08-19 01:26:24 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] Video saved to temporary file: temp_videos/71d177b6-5ed8-4494-86ef-e8088a2e0690.mp4 +2025-08-19 01:26:24 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:26:29 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:26:29 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] 30 frames saved to temp_videos/71d177b6-5ed8-4494-86ef-e8088a2e0690 +2025-08-19 01:26:29 - INFO - Prompt token length: 2276 +2025-08-19 01:26:38 - INFO - Tokens per second: 15.096657909178079, Peak GPU memory MB: 4498.375 +2025-08-19 01:26:38 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] Inference time: 13.60 seconds, CPU usage: 53.9%, CPU core utilization: [37.1, 42.5, 95.9, 40.1] +2025-08-19 01:26:38 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] Cleaned up temporary file: temp_videos/71d177b6-5ed8-4494-86ef-e8088a2e0690.mp4 +2025-08-19 01:26:38 - INFO - [71d177b6-5ed8-4494-86ef-e8088a2e0690] Cleaned up temporary frame directory: temp_videos/71d177b6-5ed8-4494-86ef-e8088a2e0690 +2025-08-19 01:26:38 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_065.mp4' +2025-08-19 01:26:38 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] Video saved to temporary file: temp_videos/2e2800d4-866f-4129-bf4c-6896240fd4cd.mp4 +2025-08-19 01:26:38 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:26:43 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:26:43 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] 30 frames saved to temp_videos/2e2800d4-866f-4129-bf4c-6896240fd4cd +2025-08-19 01:26:43 - INFO - Prompt token length: 2276 +2025-08-19 01:26:50 - INFO - Tokens per second: 14.940171103532032, Peak GPU memory MB: 4498.375 +2025-08-19 01:26:50 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] Inference time: 12.34 seconds, CPU usage: 56.1%, CPU core utilization: [49.2, 86.1, 48.6, 40.7] +2025-08-19 01:26:50 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] Cleaned up temporary file: temp_videos/2e2800d4-866f-4129-bf4c-6896240fd4cd.mp4 +2025-08-19 01:26:50 - INFO - [2e2800d4-866f-4129-bf4c-6896240fd4cd] Cleaned up temporary frame directory: temp_videos/2e2800d4-866f-4129-bf4c-6896240fd4cd +2025-08-19 01:26:50 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_066.mp4' +2025-08-19 01:26:50 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] Video saved to temporary file: temp_videos/5cfbadaa-3fc7-4e86-ae53-7fa554a394ee.mp4 +2025-08-19 01:26:50 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:26:55 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:26:55 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] 30 frames saved to temp_videos/5cfbadaa-3fc7-4e86-ae53-7fa554a394ee +2025-08-19 01:26:55 - INFO - Prompt token length: 2276 +2025-08-19 01:27:04 - INFO - Tokens per second: 14.904040429165455, Peak GPU memory MB: 4498.375 +2025-08-19 01:27:04 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] Inference time: 13.78 seconds, CPU usage: 55.9%, CPU core utilization: [49.7, 49.8, 81.6, 42.5] +2025-08-19 01:27:04 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] Cleaned up temporary file: temp_videos/5cfbadaa-3fc7-4e86-ae53-7fa554a394ee.mp4 +2025-08-19 01:27:04 - INFO - [5cfbadaa-3fc7-4e86-ae53-7fa554a394ee] Cleaned up temporary frame directory: temp_videos/5cfbadaa-3fc7-4e86-ae53-7fa554a394ee +2025-08-19 01:27:04 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_067.mp4' +2025-08-19 01:27:04 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] Video saved to temporary file: temp_videos/76ba2d4b-5cbc-4ffc-8fc1-5a592e782300.mp4 +2025-08-19 01:27:04 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:27:09 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:27:09 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] 30 frames saved to temp_videos/76ba2d4b-5cbc-4ffc-8fc1-5a592e782300 +2025-08-19 01:27:09 - INFO - Prompt token length: 2276 +2025-08-19 01:27:17 - INFO - Tokens per second: 14.830904988362118, Peak GPU memory MB: 4498.375 +2025-08-19 01:27:17 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] Inference time: 12.95 seconds, CPU usage: 52.7%, CPU core utilization: [40.7, 36.3, 37.9, 95.8] +2025-08-19 01:27:17 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] Cleaned up temporary file: temp_videos/76ba2d4b-5cbc-4ffc-8fc1-5a592e782300.mp4 +2025-08-19 01:27:17 - INFO - [76ba2d4b-5cbc-4ffc-8fc1-5a592e782300] Cleaned up temporary frame directory: temp_videos/76ba2d4b-5cbc-4ffc-8fc1-5a592e782300 +2025-08-19 01:27:17 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_068.mp4' +2025-08-19 01:27:17 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] Video saved to temporary file: temp_videos/69af6589-da27-4f13-a6df-9434d9898e58.mp4 +2025-08-19 01:27:17 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:27:22 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:27:22 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] 30 frames saved to temp_videos/69af6589-da27-4f13-a6df-9434d9898e58 +2025-08-19 01:27:22 - INFO - Prompt token length: 2276 +2025-08-19 01:27:34 - INFO - Tokens per second: 15.1336533103342, Peak GPU memory MB: 4498.375 +2025-08-19 01:27:34 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] Inference time: 16.86 seconds, CPU usage: 48.5%, CPU core utilization: [38.2, 52.1, 34.5, 69.4] +2025-08-19 01:27:34 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] Cleaned up temporary file: temp_videos/69af6589-da27-4f13-a6df-9434d9898e58.mp4 +2025-08-19 01:27:34 - INFO - [69af6589-da27-4f13-a6df-9434d9898e58] Cleaned up temporary frame directory: temp_videos/69af6589-da27-4f13-a6df-9434d9898e58 +2025-08-19 01:27:34 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_069.mp4' +2025-08-19 01:27:34 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] Video saved to temporary file: temp_videos/9b7f7150-673e-4bf0-9316-1d84aec58789.mp4 +2025-08-19 01:27:34 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:27:40 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:27:40 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] 30 frames saved to temp_videos/9b7f7150-673e-4bf0-9316-1d84aec58789 +2025-08-19 01:27:40 - INFO - Prompt token length: 2276 +2025-08-19 01:27:59 - INFO - Tokens per second: 15.159161023592182, Peak GPU memory MB: 4498.375 +2025-08-19 01:27:59 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] Inference time: 24.82 seconds, CPU usage: 45.5%, CPU core utilization: [65.6, 35.5, 27.1, 53.8] +2025-08-19 01:27:59 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] Cleaned up temporary file: temp_videos/9b7f7150-673e-4bf0-9316-1d84aec58789.mp4 +2025-08-19 01:27:59 - INFO - [9b7f7150-673e-4bf0-9316-1d84aec58789] Cleaned up temporary frame directory: temp_videos/9b7f7150-673e-4bf0-9316-1d84aec58789 +2025-08-19 01:27:59 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_070.mp4' +2025-08-19 01:27:59 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] Video saved to temporary file: temp_videos/cfdeca3e-8079-40a5-9281-73b31662e9a3.mp4 +2025-08-19 01:27:59 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:28:05 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:28:05 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] 30 frames saved to temp_videos/cfdeca3e-8079-40a5-9281-73b31662e9a3 +2025-08-19 01:28:05 - INFO - Prompt token length: 2276 +2025-08-19 01:28:16 - INFO - Tokens per second: 15.19258322661976, Peak GPU memory MB: 4498.375 +2025-08-19 01:28:16 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] Inference time: 16.96 seconds, CPU usage: 51.3%, CPU core utilization: [77.7, 39.2, 52.5, 35.6] +2025-08-19 01:28:16 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] Cleaned up temporary file: temp_videos/cfdeca3e-8079-40a5-9281-73b31662e9a3.mp4 +2025-08-19 01:28:16 - INFO - [cfdeca3e-8079-40a5-9281-73b31662e9a3] Cleaned up temporary frame directory: temp_videos/cfdeca3e-8079-40a5-9281-73b31662e9a3 +2025-08-19 01:28:16 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_071.mp4' +2025-08-19 01:28:16 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] Video saved to temporary file: temp_videos/29e4791d-ce0c-47ee-ad26-3d5560cfb4b8.mp4 +2025-08-19 01:28:16 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:28:20 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:28:20 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] 30 frames saved to temp_videos/29e4791d-ce0c-47ee-ad26-3d5560cfb4b8 +2025-08-19 01:28:20 - INFO - Prompt token length: 2276 +2025-08-19 01:28:27 - INFO - Tokens per second: 15.094968885806995, Peak GPU memory MB: 4498.375 +2025-08-19 01:28:27 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] Inference time: 11.34 seconds, CPU usage: 56.8%, CPU core utilization: [90.7, 41.6, 48.1, 47.0] +2025-08-19 01:28:27 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] Cleaned up temporary file: temp_videos/29e4791d-ce0c-47ee-ad26-3d5560cfb4b8.mp4 +2025-08-19 01:28:27 - INFO - [29e4791d-ce0c-47ee-ad26-3d5560cfb4b8] Cleaned up temporary frame directory: temp_videos/29e4791d-ce0c-47ee-ad26-3d5560cfb4b8 +2025-08-19 01:28:27 - INFO - [da02b2c2-35be-42d6-b069-def102177448] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_072.mp4' +2025-08-19 01:28:27 - INFO - [da02b2c2-35be-42d6-b069-def102177448] Video saved to temporary file: temp_videos/da02b2c2-35be-42d6-b069-def102177448.mp4 +2025-08-19 01:28:27 - INFO - [da02b2c2-35be-42d6-b069-def102177448] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:28:32 - INFO - [da02b2c2-35be-42d6-b069-def102177448] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:28:32 - INFO - [da02b2c2-35be-42d6-b069-def102177448] 30 frames saved to temp_videos/da02b2c2-35be-42d6-b069-def102177448 +2025-08-19 01:28:32 - INFO - Prompt token length: 2276 +2025-08-19 01:28:39 - INFO - Tokens per second: 15.089671871346129, Peak GPU memory MB: 4498.375 +2025-08-19 01:28:39 - INFO - [da02b2c2-35be-42d6-b069-def102177448] Inference time: 12.46 seconds, CPU usage: 53.7%, CPU core utilization: [41.5, 38.1, 39.3, 95.6] +2025-08-19 01:28:39 - INFO - [da02b2c2-35be-42d6-b069-def102177448] Cleaned up temporary file: temp_videos/da02b2c2-35be-42d6-b069-def102177448.mp4 +2025-08-19 01:28:39 - INFO - [da02b2c2-35be-42d6-b069-def102177448] Cleaned up temporary frame directory: temp_videos/da02b2c2-35be-42d6-b069-def102177448 +2025-08-19 01:28:39 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_073.mp4' +2025-08-19 01:28:39 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] Video saved to temporary file: temp_videos/e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1.mp4 +2025-08-19 01:28:39 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:28:44 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:28:44 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] 30 frames saved to temp_videos/e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1 +2025-08-19 01:28:44 - INFO - Prompt token length: 2276 +2025-08-19 01:28:56 - INFO - Tokens per second: 15.166777613396597, Peak GPU memory MB: 4498.375 +2025-08-19 01:28:56 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] Inference time: 16.90 seconds, CPU usage: 48.3%, CPU core utilization: [68.4, 35.7, 57.5, 31.6] +2025-08-19 01:28:56 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] Cleaned up temporary file: temp_videos/e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1.mp4 +2025-08-19 01:28:56 - INFO - [e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1] Cleaned up temporary frame directory: temp_videos/e0e0e6d1-9123-4629-8d9a-c0e1a3c75bf1 +2025-08-19 01:28:56 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_074.mp4' +2025-08-19 01:28:56 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] Video saved to temporary file: temp_videos/d6178734-2347-48b0-ac6c-de3d6b08844f.mp4 +2025-08-19 01:28:56 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:29:01 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:29:01 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] 30 frames saved to temp_videos/d6178734-2347-48b0-ac6c-de3d6b08844f +2025-08-19 01:29:02 - INFO - Prompt token length: 2276 +2025-08-19 01:29:09 - INFO - Tokens per second: 15.050789376653178, Peak GPU memory MB: 4498.375 +2025-08-19 01:29:09 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] Inference time: 12.29 seconds, CPU usage: 56.3%, CPU core utilization: [44.5, 50.7, 42.0, 87.8] +2025-08-19 01:29:09 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] Cleaned up temporary file: temp_videos/d6178734-2347-48b0-ac6c-de3d6b08844f.mp4 +2025-08-19 01:29:09 - INFO - [d6178734-2347-48b0-ac6c-de3d6b08844f] Cleaned up temporary frame directory: temp_videos/d6178734-2347-48b0-ac6c-de3d6b08844f +2025-08-19 01:29:09 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_075.mp4' +2025-08-19 01:29:09 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] Video saved to temporary file: temp_videos/9f020d89-b20e-4f18-a363-6f2491dde56a.mp4 +2025-08-19 01:29:09 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:29:13 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:29:13 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] 30 frames saved to temp_videos/9f020d89-b20e-4f18-a363-6f2491dde56a +2025-08-19 01:29:14 - INFO - Prompt token length: 2276 +2025-08-19 01:29:21 - INFO - Tokens per second: 15.095610978597701, Peak GPU memory MB: 4498.375 +2025-08-19 01:29:21 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] Inference time: 11.83 seconds, CPU usage: 54.9%, CPU core utilization: [47.2, 56.5, 45.0, 70.8] +2025-08-19 01:29:21 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] Cleaned up temporary file: temp_videos/9f020d89-b20e-4f18-a363-6f2491dde56a.mp4 +2025-08-19 01:29:21 - INFO - [9f020d89-b20e-4f18-a363-6f2491dde56a] Cleaned up temporary frame directory: temp_videos/9f020d89-b20e-4f18-a363-6f2491dde56a +2025-08-19 01:29:21 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_076.mp4' +2025-08-19 01:29:21 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] Video saved to temporary file: temp_videos/27563b6e-2f25-493c-a6d2-a6eebbf7eae2.mp4 +2025-08-19 01:29:21 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:29:25 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:29:25 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] 30 frames saved to temp_videos/27563b6e-2f25-493c-a6d2-a6eebbf7eae2 +2025-08-19 01:29:25 - INFO - Prompt token length: 2276 +2025-08-19 01:29:33 - INFO - Tokens per second: 15.096457780335871, Peak GPU memory MB: 4498.375 +2025-08-19 01:29:33 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] Inference time: 12.64 seconds, CPU usage: 53.5%, CPU core utilization: [41.6, 38.1, 39.4, 94.7] +2025-08-19 01:29:33 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] Cleaned up temporary file: temp_videos/27563b6e-2f25-493c-a6d2-a6eebbf7eae2.mp4 +2025-08-19 01:29:33 - INFO - [27563b6e-2f25-493c-a6d2-a6eebbf7eae2] Cleaned up temporary frame directory: temp_videos/27563b6e-2f25-493c-a6d2-a6eebbf7eae2 +2025-08-19 01:29:33 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_077.mp4' +2025-08-19 01:29:33 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] Video saved to temporary file: temp_videos/5bd6db33-5c87-43e1-be7d-16cdbc2bb592.mp4 +2025-08-19 01:29:33 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:29:39 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:29:39 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] 30 frames saved to temp_videos/5bd6db33-5c87-43e1-be7d-16cdbc2bb592 +2025-08-19 01:29:39 - INFO - Prompt token length: 2276 +2025-08-19 01:29:46 - INFO - Tokens per second: 15.115430955512632, Peak GPU memory MB: 4498.375 +2025-08-19 01:29:46 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] Inference time: 13.08 seconds, CPU usage: 55.8%, CPU core utilization: [45.9, 46.4, 91.0, 39.9] +2025-08-19 01:29:46 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] Cleaned up temporary file: temp_videos/5bd6db33-5c87-43e1-be7d-16cdbc2bb592.mp4 +2025-08-19 01:29:46 - INFO - [5bd6db33-5c87-43e1-be7d-16cdbc2bb592] Cleaned up temporary frame directory: temp_videos/5bd6db33-5c87-43e1-be7d-16cdbc2bb592 +2025-08-19 01:29:46 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_078.mp4' +2025-08-19 01:29:46 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] Video saved to temporary file: temp_videos/2974d46e-64cc-4a67-8b24-1d6dd17263ea.mp4 +2025-08-19 01:29:46 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:29:51 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:29:51 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] 30 frames saved to temp_videos/2974d46e-64cc-4a67-8b24-1d6dd17263ea +2025-08-19 01:29:52 - INFO - Prompt token length: 2276 +2025-08-19 01:29:59 - INFO - Tokens per second: 15.155702459448817, Peak GPU memory MB: 4498.375 +2025-08-19 01:29:59 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] Inference time: 12.56 seconds, CPU usage: 55.1%, CPU core utilization: [74.5, 39.8, 61.1, 45.1] +2025-08-19 01:29:59 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] Cleaned up temporary file: temp_videos/2974d46e-64cc-4a67-8b24-1d6dd17263ea.mp4 +2025-08-19 01:29:59 - INFO - [2974d46e-64cc-4a67-8b24-1d6dd17263ea] Cleaned up temporary frame directory: temp_videos/2974d46e-64cc-4a67-8b24-1d6dd17263ea +2025-08-19 01:29:59 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_079.mp4' +2025-08-19 01:29:59 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] Video saved to temporary file: temp_videos/abfb1afd-bc3a-422e-bc73-4c9c0d30dd44.mp4 +2025-08-19 01:29:59 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:30:05 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:30:05 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] 30 frames saved to temp_videos/abfb1afd-bc3a-422e-bc73-4c9c0d30dd44 +2025-08-19 01:30:05 - INFO - Prompt token length: 2276 +2025-08-19 01:30:12 - INFO - Tokens per second: 15.112858391134807, Peak GPU memory MB: 4498.375 +2025-08-19 01:30:12 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] Inference time: 13.40 seconds, CPU usage: 58.8%, CPU core utilization: [49.9, 45.3, 90.4, 49.6] +2025-08-19 01:30:12 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] Cleaned up temporary file: temp_videos/abfb1afd-bc3a-422e-bc73-4c9c0d30dd44.mp4 +2025-08-19 01:30:12 - INFO - [abfb1afd-bc3a-422e-bc73-4c9c0d30dd44] Cleaned up temporary frame directory: temp_videos/abfb1afd-bc3a-422e-bc73-4c9c0d30dd44 +2025-08-19 01:30:12 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_080.mp4' +2025-08-19 01:30:12 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] Video saved to temporary file: temp_videos/aa6591ec-00bd-4dbe-91c2-6c14ccb38567.mp4 +2025-08-19 01:30:12 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:30:18 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:30:18 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] 30 frames saved to temp_videos/aa6591ec-00bd-4dbe-91c2-6c14ccb38567 +2025-08-19 01:30:18 - INFO - Prompt token length: 2276 +2025-08-19 01:30:29 - INFO - Tokens per second: 15.170065726896636, Peak GPU memory MB: 4498.375 +2025-08-19 01:30:29 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] Inference time: 16.35 seconds, CPU usage: 51.3%, CPU core utilization: [80.9, 52.4, 34.9, 36.9] +2025-08-19 01:30:29 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] Cleaned up temporary file: temp_videos/aa6591ec-00bd-4dbe-91c2-6c14ccb38567.mp4 +2025-08-19 01:30:29 - INFO - [aa6591ec-00bd-4dbe-91c2-6c14ccb38567] Cleaned up temporary frame directory: temp_videos/aa6591ec-00bd-4dbe-91c2-6c14ccb38567 +2025-08-19 01:30:29 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_081.mp4' +2025-08-19 01:30:29 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] Video saved to temporary file: temp_videos/263e2d9e-d186-451f-a827-6bbf2fd0815b.mp4 +2025-08-19 01:30:29 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:30:34 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:30:34 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] 30 frames saved to temp_videos/263e2d9e-d186-451f-a827-6bbf2fd0815b +2025-08-19 01:30:34 - INFO - Prompt token length: 2276 +2025-08-19 01:30:53 - INFO - Tokens per second: 15.067583259819104, Peak GPU memory MB: 4498.375 +2025-08-19 01:30:53 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] Inference time: 23.76 seconds, CPU usage: 43.1%, CPU core utilization: [22.7, 28.1, 39.8, 81.9] +2025-08-19 01:30:53 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] Cleaned up temporary file: temp_videos/263e2d9e-d186-451f-a827-6bbf2fd0815b.mp4 +2025-08-19 01:30:53 - INFO - [263e2d9e-d186-451f-a827-6bbf2fd0815b] Cleaned up temporary frame directory: temp_videos/263e2d9e-d186-451f-a827-6bbf2fd0815b +2025-08-19 01:30:53 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_082.mp4' +2025-08-19 01:30:53 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] Video saved to temporary file: temp_videos/9ed54c7b-2992-4884-9d06-90296c51f4ec.mp4 +2025-08-19 01:30:53 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:30:57 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:30:57 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] 30 frames saved to temp_videos/9ed54c7b-2992-4884-9d06-90296c51f4ec +2025-08-19 01:30:58 - INFO - Prompt token length: 2276 +2025-08-19 01:31:04 - INFO - Tokens per second: 15.231033552960797, Peak GPU memory MB: 4498.375 +2025-08-19 01:31:04 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] Inference time: 11.35 seconds, CPU usage: 56.9%, CPU core utilization: [49.3, 42.9, 88.6, 46.7] +2025-08-19 01:31:04 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] Cleaned up temporary file: temp_videos/9ed54c7b-2992-4884-9d06-90296c51f4ec.mp4 +2025-08-19 01:31:04 - INFO - [9ed54c7b-2992-4884-9d06-90296c51f4ec] Cleaned up temporary frame directory: temp_videos/9ed54c7b-2992-4884-9d06-90296c51f4ec +2025-08-19 01:31:04 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_083.mp4' +2025-08-19 01:31:04 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] Video saved to temporary file: temp_videos/ea923044-7ba0-4945-b204-c85836d6e64b.mp4 +2025-08-19 01:31:04 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:31:09 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:31:09 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] 30 frames saved to temp_videos/ea923044-7ba0-4945-b204-c85836d6e64b +2025-08-19 01:31:09 - INFO - Prompt token length: 2276 +2025-08-19 01:31:28 - INFO - Tokens per second: 15.208403584063404, Peak GPU memory MB: 4498.375 +2025-08-19 01:31:28 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] Inference time: 23.63 seconds, CPU usage: 43.2%, CPU core utilization: [62.4, 38.7, 45.7, 25.9] +2025-08-19 01:31:28 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] Cleaned up temporary file: temp_videos/ea923044-7ba0-4945-b204-c85836d6e64b.mp4 +2025-08-19 01:31:28 - INFO - [ea923044-7ba0-4945-b204-c85836d6e64b] Cleaned up temporary frame directory: temp_videos/ea923044-7ba0-4945-b204-c85836d6e64b +2025-08-19 01:31:28 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_084.mp4' +2025-08-19 01:31:28 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] Video saved to temporary file: temp_videos/e2f206b5-b292-45c7-b9fb-fa514828945a.mp4 +2025-08-19 01:31:28 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:31:33 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:31:33 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] 30 frames saved to temp_videos/e2f206b5-b292-45c7-b9fb-fa514828945a +2025-08-19 01:31:33 - INFO - Prompt token length: 2276 +2025-08-19 01:31:40 - INFO - Tokens per second: 15.128428649096634, Peak GPU memory MB: 4498.375 +2025-08-19 01:31:40 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] Inference time: 11.76 seconds, CPU usage: 57.5%, CPU core utilization: [45.1, 71.3, 47.2, 66.3] +2025-08-19 01:31:40 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] Cleaned up temporary file: temp_videos/e2f206b5-b292-45c7-b9fb-fa514828945a.mp4 +2025-08-19 01:31:40 - INFO - [e2f206b5-b292-45c7-b9fb-fa514828945a] Cleaned up temporary frame directory: temp_videos/e2f206b5-b292-45c7-b9fb-fa514828945a +2025-08-19 01:31:40 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_085.mp4' +2025-08-19 01:31:40 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] Video saved to temporary file: temp_videos/97725e3a-1110-4554-bec6-a5800009551d.mp4 +2025-08-19 01:31:40 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:31:45 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:31:45 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] 30 frames saved to temp_videos/97725e3a-1110-4554-bec6-a5800009551d +2025-08-19 01:31:45 - INFO - Prompt token length: 2276 +2025-08-19 01:31:53 - INFO - Tokens per second: 15.09179881903882, Peak GPU memory MB: 4498.375 +2025-08-19 01:31:53 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] Inference time: 12.98 seconds, CPU usage: 57.2%, CPU core utilization: [95.5, 44.2, 40.6, 48.6] +2025-08-19 01:31:53 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] Cleaned up temporary file: temp_videos/97725e3a-1110-4554-bec6-a5800009551d.mp4 +2025-08-19 01:31:53 - INFO - [97725e3a-1110-4554-bec6-a5800009551d] Cleaned up temporary frame directory: temp_videos/97725e3a-1110-4554-bec6-a5800009551d +2025-08-19 01:31:53 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_086.mp4' +2025-08-19 01:31:53 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] Video saved to temporary file: temp_videos/9855dcfa-b241-4d85-a706-eb64ffaef9fe.mp4 +2025-08-19 01:31:53 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:31:58 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:31:58 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] 30 frames saved to temp_videos/9855dcfa-b241-4d85-a706-eb64ffaef9fe +2025-08-19 01:31:58 - INFO - Prompt token length: 2276 +2025-08-19 01:32:17 - INFO - Tokens per second: 15.243661062498152, Peak GPU memory MB: 4498.375 +2025-08-19 01:32:17 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] Inference time: 24.26 seconds, CPU usage: 43.6%, CPU core utilization: [32.2, 26.5, 86.3, 29.5] +2025-08-19 01:32:17 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] Cleaned up temporary file: temp_videos/9855dcfa-b241-4d85-a706-eb64ffaef9fe.mp4 +2025-08-19 01:32:17 - INFO - [9855dcfa-b241-4d85-a706-eb64ffaef9fe] Cleaned up temporary frame directory: temp_videos/9855dcfa-b241-4d85-a706-eb64ffaef9fe +2025-08-19 01:32:17 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_087.mp4' +2025-08-19 01:32:17 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] Video saved to temporary file: temp_videos/0d5ab643-1cc0-43da-bfcf-c9bd9b051541.mp4 +2025-08-19 01:32:17 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:32:23 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:32:23 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] 30 frames saved to temp_videos/0d5ab643-1cc0-43da-bfcf-c9bd9b051541 +2025-08-19 01:32:23 - INFO - Prompt token length: 2276 +2025-08-19 01:32:30 - INFO - Tokens per second: 15.018603405888955, Peak GPU memory MB: 4498.375 +2025-08-19 01:32:30 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] Inference time: 13.33 seconds, CPU usage: 58.0%, CPU core utilization: [47.7, 69.1, 47.8, 67.4] +2025-08-19 01:32:30 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] Cleaned up temporary file: temp_videos/0d5ab643-1cc0-43da-bfcf-c9bd9b051541.mp4 +2025-08-19 01:32:30 - INFO - [0d5ab643-1cc0-43da-bfcf-c9bd9b051541] Cleaned up temporary frame directory: temp_videos/0d5ab643-1cc0-43da-bfcf-c9bd9b051541 +2025-08-19 01:32:30 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_088.mp4' +2025-08-19 01:32:30 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] Video saved to temporary file: temp_videos/59b89d7f-21c0-4704-89a2-ba749481b783.mp4 +2025-08-19 01:32:30 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:32:36 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:32:36 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] 30 frames saved to temp_videos/59b89d7f-21c0-4704-89a2-ba749481b783 +2025-08-19 01:32:36 - INFO - Prompt token length: 2276 +2025-08-19 01:32:42 - INFO - Tokens per second: 15.050961440857408, Peak GPU memory MB: 4498.375 +2025-08-19 01:32:42 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] Inference time: 12.16 seconds, CPU usage: 58.7%, CPU core utilization: [47.3, 73.4, 60.5, 53.5] +2025-08-19 01:32:42 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] Cleaned up temporary file: temp_videos/59b89d7f-21c0-4704-89a2-ba749481b783.mp4 +2025-08-19 01:32:42 - INFO - [59b89d7f-21c0-4704-89a2-ba749481b783] Cleaned up temporary frame directory: temp_videos/59b89d7f-21c0-4704-89a2-ba749481b783 +2025-08-19 01:32:42 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_089.mp4' +2025-08-19 01:32:42 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] Video saved to temporary file: temp_videos/d31a02e7-3cda-40f5-97c6-159efe0e99b0.mp4 +2025-08-19 01:32:42 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:32:47 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:32:47 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] 30 frames saved to temp_videos/d31a02e7-3cda-40f5-97c6-159efe0e99b0 +2025-08-19 01:32:47 - INFO - Prompt token length: 2276 +2025-08-19 01:32:54 - INFO - Tokens per second: 15.072998222410463, Peak GPU memory MB: 4498.375 +2025-08-19 01:32:54 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] Inference time: 11.24 seconds, CPU usage: 56.2%, CPU core utilization: [62.1, 46.8, 74.6, 41.3] +2025-08-19 01:32:54 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] Cleaned up temporary file: temp_videos/d31a02e7-3cda-40f5-97c6-159efe0e99b0.mp4 +2025-08-19 01:32:54 - INFO - [d31a02e7-3cda-40f5-97c6-159efe0e99b0] Cleaned up temporary frame directory: temp_videos/d31a02e7-3cda-40f5-97c6-159efe0e99b0 +2025-08-19 01:32:54 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_090.mp4' +2025-08-19 01:32:54 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] Video saved to temporary file: temp_videos/925d037f-7ad8-4811-9a51-bb340e45cf0c.mp4 +2025-08-19 01:32:54 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:32:59 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:32:59 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] 30 frames saved to temp_videos/925d037f-7ad8-4811-9a51-bb340e45cf0c +2025-08-19 01:32:59 - INFO - Prompt token length: 2276 +2025-08-19 01:33:07 - INFO - Tokens per second: 15.126399424127708, Peak GPU memory MB: 4498.375 +2025-08-19 01:33:07 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] Inference time: 13.31 seconds, CPU usage: 55.6%, CPU core utilization: [40.0, 39.7, 95.5, 47.3] +2025-08-19 01:33:07 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] Cleaned up temporary file: temp_videos/925d037f-7ad8-4811-9a51-bb340e45cf0c.mp4 +2025-08-19 01:33:07 - INFO - [925d037f-7ad8-4811-9a51-bb340e45cf0c] Cleaned up temporary frame directory: temp_videos/925d037f-7ad8-4811-9a51-bb340e45cf0c +2025-08-19 01:33:07 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_091.mp4' +2025-08-19 01:33:07 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] Video saved to temporary file: temp_videos/f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41.mp4 +2025-08-19 01:33:07 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:33:12 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:33:12 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] 30 frames saved to temp_videos/f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41 +2025-08-19 01:33:12 - INFO - Prompt token length: 2276 +2025-08-19 01:33:22 - INFO - Tokens per second: 14.93804804390747, Peak GPU memory MB: 4498.375 +2025-08-19 01:33:22 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] Inference time: 14.68 seconds, CPU usage: 50.5%, CPU core utilization: [44.0, 60.5, 34.3, 63.0] +2025-08-19 01:33:22 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] Cleaned up temporary file: temp_videos/f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41.mp4 +2025-08-19 01:33:22 - INFO - [f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41] Cleaned up temporary frame directory: temp_videos/f43e2bfb-d37c-4f66-8a9f-db3ac3e7ff41 +2025-08-19 01:33:22 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_092.mp4' +2025-08-19 01:33:22 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] Video saved to temporary file: temp_videos/61d0d8b0-cbd7-4c3b-8214-6cdb7a374098.mp4 +2025-08-19 01:33:22 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:33:26 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:33:26 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] 30 frames saved to temp_videos/61d0d8b0-cbd7-4c3b-8214-6cdb7a374098 +2025-08-19 01:33:26 - INFO - Prompt token length: 2276 +2025-08-19 01:33:32 - INFO - Tokens per second: 15.067549387193225, Peak GPU memory MB: 4498.375 +2025-08-19 01:33:32 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] Inference time: 9.99 seconds, CPU usage: 54.1%, CPU core utilization: [45.6, 75.5, 38.9, 56.5] +2025-08-19 01:33:32 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] Cleaned up temporary file: temp_videos/61d0d8b0-cbd7-4c3b-8214-6cdb7a374098.mp4 +2025-08-19 01:33:32 - INFO - [61d0d8b0-cbd7-4c3b-8214-6cdb7a374098] Cleaned up temporary frame directory: temp_videos/61d0d8b0-cbd7-4c3b-8214-6cdb7a374098 +2025-08-19 01:33:32 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_093.mp4' +2025-08-19 01:33:32 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] Video saved to temporary file: temp_videos/826a88fd-cbf8-4cf9-b764-6996fd74d6d2.mp4 +2025-08-19 01:33:32 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:33:34 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:33:34 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] 30 frames saved to temp_videos/826a88fd-cbf8-4cf9-b764-6996fd74d6d2 +2025-08-19 01:33:34 - INFO - Prompt token length: 2276 +2025-08-19 01:33:39 - INFO - Tokens per second: 15.029368119821319, Peak GPU memory MB: 4498.375 +2025-08-19 01:33:39 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] Inference time: 6.94 seconds, CPU usage: 51.0%, CPU core utilization: [34.1, 73.9, 41.2, 54.7] +2025-08-19 01:33:39 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] Cleaned up temporary file: temp_videos/826a88fd-cbf8-4cf9-b764-6996fd74d6d2.mp4 +2025-08-19 01:33:39 - INFO - [826a88fd-cbf8-4cf9-b764-6996fd74d6d2] Cleaned up temporary frame directory: temp_videos/826a88fd-cbf8-4cf9-b764-6996fd74d6d2 +2025-08-19 01:33:39 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_094.mp4' +2025-08-19 01:33:39 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] Video saved to temporary file: temp_videos/e7d8aa39-d350-4db9-8832-fe35f5bc0c4b.mp4 +2025-08-19 01:33:39 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:33:41 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:33:41 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] 30 frames saved to temp_videos/e7d8aa39-d350-4db9-8832-fe35f5bc0c4b +2025-08-19 01:33:41 - INFO - Prompt token length: 2276 +2025-08-19 01:33:47 - INFO - Tokens per second: 15.14670944729439, Peak GPU memory MB: 4498.375 +2025-08-19 01:33:47 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] Inference time: 8.35 seconds, CPU usage: 45.1%, CPU core utilization: [46.0, 56.7, 46.2, 31.6] +2025-08-19 01:33:47 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] Cleaned up temporary file: temp_videos/e7d8aa39-d350-4db9-8832-fe35f5bc0c4b.mp4 +2025-08-19 01:33:47 - INFO - [e7d8aa39-d350-4db9-8832-fe35f5bc0c4b] Cleaned up temporary frame directory: temp_videos/e7d8aa39-d350-4db9-8832-fe35f5bc0c4b +2025-08-19 01:33:47 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 01:33:47 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] Video saved to temporary file: temp_videos/b92d07fb-cafc-4034-ae0e-9e999bf6cac9.mp4 +2025-08-19 01:33:47 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 01:33:51 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 01:33:51 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] 30 frames saved to temp_videos/b92d07fb-cafc-4034-ae0e-9e999bf6cac9 +2025-08-19 01:33:51 - INFO - Prompt token length: 2276 +2025-08-19 01:34:10 - INFO - Tokens per second: 15.085808154125798, Peak GPU memory MB: 4498.375 +2025-08-19 01:34:10 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] Inference time: 22.59 seconds, CPU usage: 40.6%, CPU core utilization: [42.0, 27.3, 73.2, 19.7] +2025-08-19 01:34:10 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] Cleaned up temporary file: temp_videos/b92d07fb-cafc-4034-ae0e-9e999bf6cac9.mp4 +2025-08-19 01:34:10 - INFO - [b92d07fb-cafc-4034-ae0e-9e999bf6cac9] Cleaned up temporary frame directory: temp_videos/b92d07fb-cafc-4034-ae0e-9e999bf6cac9 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250821_002944.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250821_002944.log new file mode 100644 index 0000000000000000000000000000000000000000..53955c0fe83da584802eacb2970d8b740fe02d3d --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250821_002944.log @@ -0,0 +1,94 @@ +2025-08-21 00:29:44 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-21 00:29:48 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-21 00:30:16 - INFO - Model loaded in 32.31 seconds +2025-08-21 00:30:16 - INFO - GPU Memory Usage after model load: 2369.47 MB +2025-08-21 00:30:22 - INFO - [473c7e67-59f4-4a5d-8868-f714f9787e83] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 00:30:22 - INFO - [473c7e67-59f4-4a5d-8868-f714f9787e83] Video saved to temporary file: temp_videos/473c7e67-59f4-4a5d-8868-f714f9787e83.mp4 +2025-08-21 00:30:22 - INFO - [473c7e67-59f4-4a5d-8868-f714f9787e83] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:30:27 - INFO - [473c7e67-59f4-4a5d-8868-f714f9787e83] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:30:27 - INFO - [473c7e67-59f4-4a5d-8868-f714f9787e83] 30 frames saved to temp_videos/473c7e67-59f4-4a5d-8868-f714f9787e83 +2025-08-21 00:30:28 - INFO - Prompt token length: 2306 +2025-08-21 00:30:42 - INFO - Tokens per second: 15.126569543378265, Peak GPU memory MB: 4514.375 +2025-08-21 00:30:42 - INFO - [473c7e67-59f4-4a5d-8868-f714f9787e83] Inference time: 19.59 seconds, CPU usage: 28.8%, CPU core utilization: [28.0, 25.1, 38.9, 23.2] +2025-08-21 00:30:42 - INFO - [473c7e67-59f4-4a5d-8868-f714f9787e83] Cleaned up temporary frame directory: temp_videos/473c7e67-59f4-4a5d-8868-f714f9787e83 +2025-08-21 00:30:42 - INFO - [8695ced7-6ed8-4f84-8fd3-a5645e83398c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-21 00:30:42 - INFO - [8695ced7-6ed8-4f84-8fd3-a5645e83398c] Video saved to temporary file: temp_videos/8695ced7-6ed8-4f84-8fd3-a5645e83398c.mp4 +2025-08-21 00:30:42 - INFO - [8695ced7-6ed8-4f84-8fd3-a5645e83398c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:30:47 - INFO - [8695ced7-6ed8-4f84-8fd3-a5645e83398c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:30:47 - INFO - [8695ced7-6ed8-4f84-8fd3-a5645e83398c] 30 frames saved to temp_videos/8695ced7-6ed8-4f84-8fd3-a5645e83398c +2025-08-21 00:30:47 - INFO - Prompt token length: 2306 +2025-08-21 00:30:58 - INFO - Tokens per second: 15.30312036179949, Peak GPU memory MB: 4514.375 +2025-08-21 00:30:58 - INFO - [8695ced7-6ed8-4f84-8fd3-a5645e83398c] Inference time: 15.90 seconds, CPU usage: 45.5%, CPU core utilization: [81.9, 29.9, 42.4, 27.9] +2025-08-21 00:30:58 - INFO - [8695ced7-6ed8-4f84-8fd3-a5645e83398c] Cleaned up temporary frame directory: temp_videos/8695ced7-6ed8-4f84-8fd3-a5645e83398c +2025-08-21 00:30:58 - INFO - [a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-21 00:30:58 - INFO - [a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02] Video saved to temporary file: temp_videos/a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02.mp4 +2025-08-21 00:30:58 - INFO - [a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:31:03 - INFO - [a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:31:03 - INFO - [a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02] 30 frames saved to temp_videos/a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02 +2025-08-21 00:31:03 - INFO - Prompt token length: 2306 +2025-08-21 00:31:14 - INFO - Tokens per second: 15.081962195610863, Peak GPU memory MB: 4514.375 +2025-08-21 00:31:14 - INFO - [a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02] Inference time: 15.82 seconds, CPU usage: 46.3%, CPU core utilization: [66.8, 30.9, 58.6, 29.0] +2025-08-21 00:31:14 - INFO - [a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02] Cleaned up temporary frame directory: temp_videos/a8ea642a-4c80-4dfc-a0b6-6e9f4cf8be02 +2025-08-21 00:31:14 - INFO - [127051a2-002e-4513-af2a-b168f47a679c] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-21 00:31:14 - INFO - [127051a2-002e-4513-af2a-b168f47a679c] Video saved to temporary file: temp_videos/127051a2-002e-4513-af2a-b168f47a679c.mp4 +2025-08-21 00:31:14 - INFO - [127051a2-002e-4513-af2a-b168f47a679c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:31:19 - INFO - [127051a2-002e-4513-af2a-b168f47a679c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:31:19 - INFO - [127051a2-002e-4513-af2a-b168f47a679c] 30 frames saved to temp_videos/127051a2-002e-4513-af2a-b168f47a679c +2025-08-21 00:31:19 - INFO - Prompt token length: 2306 +2025-08-21 00:31:30 - INFO - Tokens per second: 15.1012932923201, Peak GPU memory MB: 4514.375 +2025-08-21 00:31:30 - INFO - [127051a2-002e-4513-af2a-b168f47a679c] Inference time: 15.92 seconds, CPU usage: 44.7%, CPU core utilization: [29.3, 28.1, 27.0, 94.3] +2025-08-21 00:31:30 - INFO - [127051a2-002e-4513-af2a-b168f47a679c] Cleaned up temporary frame directory: temp_videos/127051a2-002e-4513-af2a-b168f47a679c +2025-08-21 00:31:30 - INFO - [a3389ddf-4af5-4921-a824-0c0c8b4ff137] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-21 00:31:30 - INFO - [a3389ddf-4af5-4921-a824-0c0c8b4ff137] Video saved to temporary file: temp_videos/a3389ddf-4af5-4921-a824-0c0c8b4ff137.mp4 +2025-08-21 00:31:30 - INFO - [a3389ddf-4af5-4921-a824-0c0c8b4ff137] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:31:35 - INFO - [a3389ddf-4af5-4921-a824-0c0c8b4ff137] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:31:35 - INFO - [a3389ddf-4af5-4921-a824-0c0c8b4ff137] 30 frames saved to temp_videos/a3389ddf-4af5-4921-a824-0c0c8b4ff137 +2025-08-21 00:31:35 - INFO - Prompt token length: 2306 +2025-08-21 00:31:42 - INFO - Tokens per second: 14.916020163544944, Peak GPU memory MB: 4514.375 +2025-08-21 00:31:42 - INFO - [a3389ddf-4af5-4921-a824-0c0c8b4ff137] Inference time: 12.32 seconds, CPU usage: 50.3%, CPU core utilization: [37.0, 57.7, 34.8, 71.6] +2025-08-21 00:31:42 - INFO - [a3389ddf-4af5-4921-a824-0c0c8b4ff137] Cleaned up temporary frame directory: temp_videos/a3389ddf-4af5-4921-a824-0c0c8b4ff137 +2025-08-21 00:31:42 - INFO - [3a1249cb-f47c-4dab-916a-8e74dfe771cd] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-21 00:31:42 - INFO - [3a1249cb-f47c-4dab-916a-8e74dfe771cd] Video saved to temporary file: temp_videos/3a1249cb-f47c-4dab-916a-8e74dfe771cd.mp4 +2025-08-21 00:31:42 - INFO - [3a1249cb-f47c-4dab-916a-8e74dfe771cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:31:47 - INFO - [3a1249cb-f47c-4dab-916a-8e74dfe771cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:31:47 - INFO - [3a1249cb-f47c-4dab-916a-8e74dfe771cd] 30 frames saved to temp_videos/3a1249cb-f47c-4dab-916a-8e74dfe771cd +2025-08-21 00:31:47 - INFO - Prompt token length: 2306 +2025-08-21 00:31:58 - INFO - Tokens per second: 15.137056054618776, Peak GPU memory MB: 4514.375 +2025-08-21 00:31:58 - INFO - [3a1249cb-f47c-4dab-916a-8e74dfe771cd] Inference time: 16.42 seconds, CPU usage: 44.8%, CPU core utilization: [31.9, 78.8, 27.5, 41.0] +2025-08-21 00:31:58 - INFO - [3a1249cb-f47c-4dab-916a-8e74dfe771cd] Cleaned up temporary frame directory: temp_videos/3a1249cb-f47c-4dab-916a-8e74dfe771cd +2025-08-21 00:31:58 - INFO - [aeccbc49-b47d-4bb4-a28f-ce86d255d26e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_007.mp4' +2025-08-21 00:31:58 - INFO - [aeccbc49-b47d-4bb4-a28f-ce86d255d26e] Video saved to temporary file: temp_videos/aeccbc49-b47d-4bb4-a28f-ce86d255d26e.mp4 +2025-08-21 00:31:58 - INFO - [aeccbc49-b47d-4bb4-a28f-ce86d255d26e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:32:03 - INFO - [aeccbc49-b47d-4bb4-a28f-ce86d255d26e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:32:03 - INFO - [aeccbc49-b47d-4bb4-a28f-ce86d255d26e] 30 frames saved to temp_videos/aeccbc49-b47d-4bb4-a28f-ce86d255d26e +2025-08-21 00:32:03 - INFO - Prompt token length: 2306 +2025-08-21 00:32:11 - INFO - Tokens per second: 15.146565978743613, Peak GPU memory MB: 4514.375 +2025-08-21 00:32:11 - INFO - [aeccbc49-b47d-4bb4-a28f-ce86d255d26e] Inference time: 12.69 seconds, CPU usage: 50.4%, CPU core utilization: [35.0, 35.3, 94.0, 37.2] +2025-08-21 00:32:11 - INFO - [aeccbc49-b47d-4bb4-a28f-ce86d255d26e] Cleaned up temporary frame directory: temp_videos/aeccbc49-b47d-4bb4-a28f-ce86d255d26e +2025-08-21 00:32:11 - INFO - [56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_008.mp4' +2025-08-21 00:32:11 - INFO - [56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e] Video saved to temporary file: temp_videos/56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e.mp4 +2025-08-21 00:32:11 - INFO - [56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:32:16 - INFO - [56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:32:16 - INFO - [56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e] 30 frames saved to temp_videos/56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e +2025-08-21 00:32:16 - INFO - Prompt token length: 2306 +2025-08-21 00:32:21 - INFO - Tokens per second: 15.105367104028009, Peak GPU memory MB: 4514.375 +2025-08-21 00:32:21 - INFO - [56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e] Inference time: 9.88 seconds, CPU usage: 56.6%, CPU core utilization: [44.1, 58.8, 44.6, 78.6] +2025-08-21 00:32:21 - INFO - [56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e] Cleaned up temporary frame directory: temp_videos/56ab2c6c-91a0-4a2b-8e6e-30b409eabd5e +2025-08-21 00:32:21 - INFO - [2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_009.mp4' +2025-08-21 00:32:21 - INFO - [2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17] Video saved to temporary file: temp_videos/2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17.mp4 +2025-08-21 00:32:21 - INFO - [2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:32:26 - INFO - [2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:32:26 - INFO - [2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17] 30 frames saved to temp_videos/2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17 +2025-08-21 00:32:26 - INFO - Prompt token length: 2306 +2025-08-21 00:32:31 - INFO - Tokens per second: 15.121278187118696, Peak GPU memory MB: 4514.375 +2025-08-21 00:32:31 - INFO - [2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17] Inference time: 9.51 seconds, CPU usage: 57.4%, CPU core utilization: [80.4, 45.2, 47.0, 57.0] +2025-08-21 00:32:31 - INFO - [2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17] Cleaned up temporary frame directory: temp_videos/2b2cfccd-ea6e-47e1-a1be-cffa74ba8d17 +2025-08-21 00:32:31 - INFO - [ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_010.mp4' +2025-08-21 00:32:31 - INFO - [ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3] Video saved to temporary file: temp_videos/ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3.mp4 +2025-08-21 00:32:31 - INFO - [ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:32:35 - INFO - [ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:32:35 - INFO - [ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3] 30 frames saved to temp_videos/ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3 +2025-08-21 00:32:36 - INFO - Prompt token length: 2306 +2025-08-21 00:32:54 - INFO - Tokens per second: 15.20172275516238, Peak GPU memory MB: 4514.375 +2025-08-21 00:32:54 - INFO - [ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3] Inference time: 23.66 seconds, CPU usage: 39.3%, CPU core utilization: [43.6, 46.9, 30.6, 36.0] +2025-08-21 00:32:54 - INFO - [ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3] Cleaned up temporary frame directory: temp_videos/ccb3ba8f-6c3e-4301-884e-8a28f8f4cac3 diff --git a/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250821_013207.log b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250821_013207.log new file mode 100644 index 0000000000000000000000000000000000000000..cf95e959884d2b37bca30081567d6e701fa5eb96 --- /dev/null +++ b/API_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250821_013207.log @@ -0,0 +1,148 @@ +2025-08-21 01:32:07 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-21 01:32:11 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-21 01:32:38 - INFO - Model loaded in 31.45 seconds +2025-08-21 01:32:38 - INFO - GPU Memory Usage after model load: 2369.47 MB +2025-08-21 01:32:48 - INFO - [6806d96b-50d0-41d5-8703-320d06e1bb84] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_001.mp4' +2025-08-21 01:32:48 - INFO - [6806d96b-50d0-41d5-8703-320d06e1bb84] Video saved to temporary file: temp_videos/6806d96b-50d0-41d5-8703-320d06e1bb84.mp4 +2025-08-21 01:32:48 - INFO - [6806d96b-50d0-41d5-8703-320d06e1bb84] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:32:52 - INFO - [6806d96b-50d0-41d5-8703-320d06e1bb84] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:32:52 - INFO - [6806d96b-50d0-41d5-8703-320d06e1bb84] 30 frames saved to temp_videos/6806d96b-50d0-41d5-8703-320d06e1bb84 +2025-08-21 01:32:52 - INFO - Prompt token length: 2306 +2025-08-21 01:33:04 - INFO - Tokens per second: 14.857358494588418, Peak GPU memory MB: 4514.375 +2025-08-21 01:33:04 - INFO - [6806d96b-50d0-41d5-8703-320d06e1bb84] Inference time: 15.50 seconds, CPU usage: 32.9%, CPU core utilization: [32.8, 28.6, 29.2, 41.1] +2025-08-21 01:33:04 - INFO - [6806d96b-50d0-41d5-8703-320d06e1bb84] Cleaned up temporary frame directory: temp_videos/6806d96b-50d0-41d5-8703-320d06e1bb84 +2025-08-21 01:33:04 - INFO - [5cb2e558-a2e2-4495-b40b-5f785967226f] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_002.mp4' +2025-08-21 01:33:04 - INFO - [5cb2e558-a2e2-4495-b40b-5f785967226f] Video saved to temporary file: temp_videos/5cb2e558-a2e2-4495-b40b-5f785967226f.mp4 +2025-08-21 01:33:04 - INFO - [5cb2e558-a2e2-4495-b40b-5f785967226f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:33:07 - INFO - [5cb2e558-a2e2-4495-b40b-5f785967226f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:33:07 - INFO - [5cb2e558-a2e2-4495-b40b-5f785967226f] 30 frames saved to temp_videos/5cb2e558-a2e2-4495-b40b-5f785967226f +2025-08-21 01:33:08 - INFO - Prompt token length: 2306 +2025-08-21 01:33:17 - INFO - Tokens per second: 14.913712045394723, Peak GPU memory MB: 4514.375 +2025-08-21 01:33:17 - INFO - [5cb2e558-a2e2-4495-b40b-5f785967226f] Inference time: 12.93 seconds, CPU usage: 42.1%, CPU core utilization: [24.8, 42.8, 25.7, 75.0] +2025-08-21 01:33:17 - INFO - [5cb2e558-a2e2-4495-b40b-5f785967226f] Cleaned up temporary frame directory: temp_videos/5cb2e558-a2e2-4495-b40b-5f785967226f +2025-08-21 01:33:17 - INFO - [515f2d40-6c02-40e7-b489-254d66061d58] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_003.mp4' +2025-08-21 01:33:17 - INFO - [515f2d40-6c02-40e7-b489-254d66061d58] Video saved to temporary file: temp_videos/515f2d40-6c02-40e7-b489-254d66061d58.mp4 +2025-08-21 01:33:17 - INFO - [515f2d40-6c02-40e7-b489-254d66061d58] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:33:20 - INFO - [515f2d40-6c02-40e7-b489-254d66061d58] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:33:20 - INFO - [515f2d40-6c02-40e7-b489-254d66061d58] 30 frames saved to temp_videos/515f2d40-6c02-40e7-b489-254d66061d58 +2025-08-21 01:33:20 - INFO - Prompt token length: 2306 +2025-08-21 01:33:39 - INFO - Tokens per second: 15.212866049846783, Peak GPU memory MB: 4514.375 +2025-08-21 01:33:39 - INFO - [515f2d40-6c02-40e7-b489-254d66061d58] Inference time: 22.18 seconds, CPU usage: 35.0%, CPU core utilization: [28.2, 14.0, 81.7, 16.0] +2025-08-21 01:33:39 - INFO - [515f2d40-6c02-40e7-b489-254d66061d58] Cleaned up temporary frame directory: temp_videos/515f2d40-6c02-40e7-b489-254d66061d58 +2025-08-21 01:33:39 - INFO - [7702f18f-4562-4928-bacd-861b024219c1] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_004.mp4' +2025-08-21 01:33:39 - INFO - [7702f18f-4562-4928-bacd-861b024219c1] Video saved to temporary file: temp_videos/7702f18f-4562-4928-bacd-861b024219c1.mp4 +2025-08-21 01:33:39 - INFO - [7702f18f-4562-4928-bacd-861b024219c1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:33:43 - INFO - [7702f18f-4562-4928-bacd-861b024219c1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:33:43 - INFO - [7702f18f-4562-4928-bacd-861b024219c1] 30 frames saved to temp_videos/7702f18f-4562-4928-bacd-861b024219c1 +2025-08-21 01:33:43 - INFO - Prompt token length: 2306 +2025-08-21 01:33:51 - INFO - Tokens per second: 14.729804011346738, Peak GPU memory MB: 4514.375 +2025-08-21 01:33:51 - INFO - [7702f18f-4562-4928-bacd-861b024219c1] Inference time: 11.51 seconds, CPU usage: 44.6%, CPU core utilization: [43.6, 28.0, 27.7, 78.9] +2025-08-21 01:33:51 - INFO - [7702f18f-4562-4928-bacd-861b024219c1] Cleaned up temporary frame directory: temp_videos/7702f18f-4562-4928-bacd-861b024219c1 +2025-08-21 01:33:51 - INFO - [14252659-b5fb-4fa7-8d3e-f62a3c69679b] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_005.mp4' +2025-08-21 01:33:51 - INFO - [14252659-b5fb-4fa7-8d3e-f62a3c69679b] Video saved to temporary file: temp_videos/14252659-b5fb-4fa7-8d3e-f62a3c69679b.mp4 +2025-08-21 01:33:51 - INFO - [14252659-b5fb-4fa7-8d3e-f62a3c69679b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:33:54 - INFO - [14252659-b5fb-4fa7-8d3e-f62a3c69679b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:33:54 - INFO - [14252659-b5fb-4fa7-8d3e-f62a3c69679b] 30 frames saved to temp_videos/14252659-b5fb-4fa7-8d3e-f62a3c69679b +2025-08-21 01:33:54 - INFO - Prompt token length: 2306 +2025-08-21 01:34:03 - INFO - Tokens per second: 15.087484052694805, Peak GPU memory MB: 4514.375 +2025-08-21 01:34:03 - INFO - [14252659-b5fb-4fa7-8d3e-f62a3c69679b] Inference time: 12.10 seconds, CPU usage: 41.3%, CPU core utilization: [37.5, 35.4, 68.2, 24.2] +2025-08-21 01:34:03 - INFO - [14252659-b5fb-4fa7-8d3e-f62a3c69679b] Cleaned up temporary frame directory: temp_videos/14252659-b5fb-4fa7-8d3e-f62a3c69679b +2025-08-21 01:34:03 - INFO - [f08308a4-d0e5-4d3f-ade5-4a3517c11659] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_006.mp4' +2025-08-21 01:34:03 - INFO - [f08308a4-d0e5-4d3f-ade5-4a3517c11659] Video saved to temporary file: temp_videos/f08308a4-d0e5-4d3f-ade5-4a3517c11659.mp4 +2025-08-21 01:34:03 - INFO - [f08308a4-d0e5-4d3f-ade5-4a3517c11659] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:34:06 - INFO - [f08308a4-d0e5-4d3f-ade5-4a3517c11659] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:34:06 - INFO - [f08308a4-d0e5-4d3f-ade5-4a3517c11659] 30 frames saved to temp_videos/f08308a4-d0e5-4d3f-ade5-4a3517c11659 +2025-08-21 01:34:06 - INFO - Prompt token length: 2306 +2025-08-21 01:34:17 - INFO - Tokens per second: 14.952252033094313, Peak GPU memory MB: 4514.375 +2025-08-21 01:34:17 - INFO - [f08308a4-d0e5-4d3f-ade5-4a3517c11659] Inference time: 14.60 seconds, CPU usage: 40.3%, CPU core utilization: [21.8, 22.2, 22.3, 94.7] +2025-08-21 01:34:17 - INFO - [f08308a4-d0e5-4d3f-ade5-4a3517c11659] Cleaned up temporary frame directory: temp_videos/f08308a4-d0e5-4d3f-ade5-4a3517c11659 +2025-08-21 01:34:17 - INFO - [5671a395-356d-43ae-9464-5fc071986b0e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_007.mp4' +2025-08-21 01:34:17 - INFO - [5671a395-356d-43ae-9464-5fc071986b0e] Video saved to temporary file: temp_videos/5671a395-356d-43ae-9464-5fc071986b0e.mp4 +2025-08-21 01:34:17 - INFO - [5671a395-356d-43ae-9464-5fc071986b0e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:34:21 - INFO - [5671a395-356d-43ae-9464-5fc071986b0e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:34:21 - INFO - [5671a395-356d-43ae-9464-5fc071986b0e] 30 frames saved to temp_videos/5671a395-356d-43ae-9464-5fc071986b0e +2025-08-21 01:34:21 - INFO - Prompt token length: 2306 +2025-08-21 01:34:29 - INFO - Tokens per second: 14.96244510342586, Peak GPU memory MB: 4514.375 +2025-08-21 01:34:29 - INFO - [5671a395-356d-43ae-9464-5fc071986b0e] Inference time: 11.52 seconds, CPU usage: 42.8%, CPU core utilization: [33.2, 59.1, 33.1, 46.1] +2025-08-21 01:34:29 - INFO - [5671a395-356d-43ae-9464-5fc071986b0e] Cleaned up temporary frame directory: temp_videos/5671a395-356d-43ae-9464-5fc071986b0e +2025-08-21 01:34:29 - INFO - [09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_008.mp4' +2025-08-21 01:34:29 - INFO - [09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f] Video saved to temporary file: temp_videos/09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f.mp4 +2025-08-21 01:34:29 - INFO - [09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:34:32 - INFO - [09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:34:32 - INFO - [09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f] 30 frames saved to temp_videos/09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f +2025-08-21 01:34:33 - INFO - Prompt token length: 2306 +2025-08-21 01:34:43 - INFO - Tokens per second: 15.042997648777325, Peak GPU memory MB: 4514.375 +2025-08-21 01:34:43 - INFO - [09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f] Inference time: 13.92 seconds, CPU usage: 40.3%, CPU core utilization: [35.6, 54.7, 22.9, 47.8] +2025-08-21 01:34:43 - INFO - [09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f] Cleaned up temporary frame directory: temp_videos/09f8c5e5-7851-4ff6-85c5-fd9a0ad9d11f +2025-08-21 01:34:43 - INFO - [9110b0b9-0870-40fe-bfa9-fde4a5519eeb] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_009.mp4' +2025-08-21 01:34:43 - INFO - [9110b0b9-0870-40fe-bfa9-fde4a5519eeb] Video saved to temporary file: temp_videos/9110b0b9-0870-40fe-bfa9-fde4a5519eeb.mp4 +2025-08-21 01:34:43 - INFO - [9110b0b9-0870-40fe-bfa9-fde4a5519eeb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:34:46 - INFO - [9110b0b9-0870-40fe-bfa9-fde4a5519eeb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:34:46 - INFO - [9110b0b9-0870-40fe-bfa9-fde4a5519eeb] 30 frames saved to temp_videos/9110b0b9-0870-40fe-bfa9-fde4a5519eeb +2025-08-21 01:34:46 - INFO - Prompt token length: 2306 +2025-08-21 01:35:05 - INFO - Tokens per second: 15.022627137863786, Peak GPU memory MB: 4514.375 +2025-08-21 01:35:05 - INFO - [9110b0b9-0870-40fe-bfa9-fde4a5519eeb] Inference time: 22.48 seconds, CPU usage: 35.1%, CPU core utilization: [14.0, 57.8, 15.6, 53.2] +2025-08-21 01:35:05 - INFO - [9110b0b9-0870-40fe-bfa9-fde4a5519eeb] Cleaned up temporary frame directory: temp_videos/9110b0b9-0870-40fe-bfa9-fde4a5519eeb +2025-08-21 01:35:05 - INFO - [a8af5915-754a-4c20-8eed-e7dc0e54633d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_010.mp4' +2025-08-21 01:35:05 - INFO - [a8af5915-754a-4c20-8eed-e7dc0e54633d] Video saved to temporary file: temp_videos/a8af5915-754a-4c20-8eed-e7dc0e54633d.mp4 +2025-08-21 01:35:05 - INFO - [a8af5915-754a-4c20-8eed-e7dc0e54633d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:35:09 - INFO - [a8af5915-754a-4c20-8eed-e7dc0e54633d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:35:09 - INFO - [a8af5915-754a-4c20-8eed-e7dc0e54633d] 30 frames saved to temp_videos/a8af5915-754a-4c20-8eed-e7dc0e54633d +2025-08-21 01:35:09 - INFO - Prompt token length: 2306 +2025-08-21 01:35:16 - INFO - Tokens per second: 14.923263484191663, Peak GPU memory MB: 4514.375 +2025-08-21 01:35:16 - INFO - [a8af5915-754a-4c20-8eed-e7dc0e54633d] Inference time: 10.32 seconds, CPU usage: 45.6%, CPU core utilization: [35.4, 79.2, 32.6, 35.0] +2025-08-21 01:35:16 - INFO - [a8af5915-754a-4c20-8eed-e7dc0e54633d] Cleaned up temporary frame directory: temp_videos/a8af5915-754a-4c20-8eed-e7dc0e54633d +2025-08-21 01:35:16 - INFO - [a45614c0-df7a-4c35-a1ea-1efa6a29a8d8] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_011.mp4' +2025-08-21 01:35:16 - INFO - [a45614c0-df7a-4c35-a1ea-1efa6a29a8d8] Video saved to temporary file: temp_videos/a45614c0-df7a-4c35-a1ea-1efa6a29a8d8.mp4 +2025-08-21 01:35:16 - INFO - [a45614c0-df7a-4c35-a1ea-1efa6a29a8d8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:35:19 - INFO - [a45614c0-df7a-4c35-a1ea-1efa6a29a8d8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:35:19 - INFO - [a45614c0-df7a-4c35-a1ea-1efa6a29a8d8] 30 frames saved to temp_videos/a45614c0-df7a-4c35-a1ea-1efa6a29a8d8 +2025-08-21 01:35:19 - INFO - Prompt token length: 2306 +2025-08-21 01:35:27 - INFO - Tokens per second: 15.164410207215045, Peak GPU memory MB: 4514.375 +2025-08-21 01:35:27 - INFO - [a45614c0-df7a-4c35-a1ea-1efa6a29a8d8] Inference time: 10.98 seconds, CPU usage: 44.3%, CPU core utilization: [88.7, 28.9, 33.2, 26.5] +2025-08-21 01:35:27 - INFO - [a45614c0-df7a-4c35-a1ea-1efa6a29a8d8] Cleaned up temporary frame directory: temp_videos/a45614c0-df7a-4c35-a1ea-1efa6a29a8d8 +2025-08-21 01:35:27 - INFO - [8156334c-d671-4483-b468-863d84a26687] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_012.mp4' +2025-08-21 01:35:27 - INFO - [8156334c-d671-4483-b468-863d84a26687] Video saved to temporary file: temp_videos/8156334c-d671-4483-b468-863d84a26687.mp4 +2025-08-21 01:35:27 - INFO - [8156334c-d671-4483-b468-863d84a26687] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:35:30 - INFO - [8156334c-d671-4483-b468-863d84a26687] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:35:30 - INFO - [8156334c-d671-4483-b468-863d84a26687] 30 frames saved to temp_videos/8156334c-d671-4483-b468-863d84a26687 +2025-08-21 01:35:30 - INFO - Prompt token length: 2306 +2025-08-21 01:35:49 - INFO - Tokens per second: 15.117784722711388, Peak GPU memory MB: 4514.375 +2025-08-21 01:35:49 - INFO - [8156334c-d671-4483-b468-863d84a26687] Inference time: 22.36 seconds, CPU usage: 34.6%, CPU core utilization: [24.1, 60.2, 15.1, 38.9] +2025-08-21 01:35:49 - INFO - [8156334c-d671-4483-b468-863d84a26687] Cleaned up temporary frame directory: temp_videos/8156334c-d671-4483-b468-863d84a26687 +2025-08-21 01:35:49 - INFO - [58827a98-0a85-4ee4-8240-b10420154270] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_013.mp4' +2025-08-21 01:35:49 - INFO - [58827a98-0a85-4ee4-8240-b10420154270] Video saved to temporary file: temp_videos/58827a98-0a85-4ee4-8240-b10420154270.mp4 +2025-08-21 01:35:49 - INFO - [58827a98-0a85-4ee4-8240-b10420154270] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:35:52 - INFO - [58827a98-0a85-4ee4-8240-b10420154270] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:35:52 - INFO - [58827a98-0a85-4ee4-8240-b10420154270] 30 frames saved to temp_videos/58827a98-0a85-4ee4-8240-b10420154270 +2025-08-21 01:35:53 - INFO - Prompt token length: 2306 +2025-08-21 01:36:00 - INFO - Tokens per second: 14.985813897711381, Peak GPU memory MB: 4514.375 +2025-08-21 01:36:00 - INFO - [58827a98-0a85-4ee4-8240-b10420154270] Inference time: 11.17 seconds, CPU usage: 41.0%, CPU core utilization: [25.1, 70.6, 23.8, 44.4] +2025-08-21 01:36:00 - INFO - [58827a98-0a85-4ee4-8240-b10420154270] Cleaned up temporary frame directory: temp_videos/58827a98-0a85-4ee4-8240-b10420154270 +2025-08-21 01:36:00 - INFO - [cc8da7cb-4ffc-4400-a354-fe30fac0dc25] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_014.mp4' +2025-08-21 01:36:00 - INFO - [cc8da7cb-4ffc-4400-a354-fe30fac0dc25] Video saved to temporary file: temp_videos/cc8da7cb-4ffc-4400-a354-fe30fac0dc25.mp4 +2025-08-21 01:36:00 - INFO - [cc8da7cb-4ffc-4400-a354-fe30fac0dc25] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:36:04 - INFO - [cc8da7cb-4ffc-4400-a354-fe30fac0dc25] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:36:04 - INFO - [cc8da7cb-4ffc-4400-a354-fe30fac0dc25] 30 frames saved to temp_videos/cc8da7cb-4ffc-4400-a354-fe30fac0dc25 +2025-08-21 01:36:04 - INFO - Prompt token length: 2306 +2025-08-21 01:36:12 - INFO - Tokens per second: 15.019531662577604, Peak GPU memory MB: 4514.375 +2025-08-21 01:36:12 - INFO - [cc8da7cb-4ffc-4400-a354-fe30fac0dc25] Inference time: 12.21 seconds, CPU usage: 40.9%, CPU core utilization: [58.5, 47.3, 33.7, 24.0] +2025-08-21 01:36:12 - INFO - [cc8da7cb-4ffc-4400-a354-fe30fac0dc25] Cleaned up temporary frame directory: temp_videos/cc8da7cb-4ffc-4400-a354-fe30fac0dc25 +2025-08-21 01:36:12 - INFO - [95265fda-7544-4393-a928-5411d89f8f51] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_015.mp4' +2025-08-21 01:36:12 - INFO - [95265fda-7544-4393-a928-5411d89f8f51] Video saved to temporary file: temp_videos/95265fda-7544-4393-a928-5411d89f8f51.mp4 +2025-08-21 01:36:12 - INFO - [95265fda-7544-4393-a928-5411d89f8f51] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:36:16 - INFO - [95265fda-7544-4393-a928-5411d89f8f51] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:36:16 - INFO - [95265fda-7544-4393-a928-5411d89f8f51] 30 frames saved to temp_videos/95265fda-7544-4393-a928-5411d89f8f51 +2025-08-21 01:36:16 - INFO - Prompt token length: 2306 +2025-08-21 01:36:23 - INFO - Tokens per second: 14.90843712868291, Peak GPU memory MB: 4514.375 +2025-08-21 01:36:23 - INFO - [95265fda-7544-4393-a928-5411d89f8f51] Inference time: 10.51 seconds, CPU usage: 42.4%, CPU core utilization: [33.2, 26.3, 25.8, 84.0] +2025-08-21 01:36:23 - INFO - [95265fda-7544-4393-a928-5411d89f8f51] Cleaned up temporary frame directory: temp_videos/95265fda-7544-4393-a928-5411d89f8f51 +2025-08-21 01:36:23 - INFO - [5425fe3f-264e-4b86-b655-903ec4f4ef2e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_016.mp4' +2025-08-21 01:36:23 - INFO - [5425fe3f-264e-4b86-b655-903ec4f4ef2e] Video saved to temporary file: temp_videos/5425fe3f-264e-4b86-b655-903ec4f4ef2e.mp4 +2025-08-21 01:36:23 - INFO - [5425fe3f-264e-4b86-b655-903ec4f4ef2e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 01:36:26 - INFO - [5425fe3f-264e-4b86-b655-903ec4f4ef2e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 01:36:26 - INFO - [5425fe3f-264e-4b86-b655-903ec4f4ef2e] 30 frames saved to temp_videos/5425fe3f-264e-4b86-b655-903ec4f4ef2e +2025-08-21 01:36:27 - INFO - Prompt token length: 2306 +2025-08-21 01:36:34 - INFO - Tokens per second: 15.01252738973886, Peak GPU memory MB: 4514.375 +2025-08-21 01:36:34 - INFO - [5425fe3f-264e-4b86-b655-903ec4f4ef2e] Inference time: 11.52 seconds, CPU usage: 42.0%, CPU core utilization: [24.5, 25.9, 25.9, 91.7] +2025-08-21 01:36:34 - INFO - [5425fe3f-264e-4b86-b655-903ec4f4ef2e] Cleaned up temporary frame directory: temp_videos/5425fe3f-264e-4b86-b655-903ec4f4ef2e diff --git a/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003308.log b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003308.log new file mode 100644 index 0000000000000000000000000000000000000000..d616c3ed3a5a34ec9fbab75de8b5a6e8f88399d0 --- /dev/null +++ b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003308.log @@ -0,0 +1,2 @@ +2025-08-21 00:33:08 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-21 00:33:56 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). diff --git a/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003548.log b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003548.log new file mode 100644 index 0000000000000000000000000000000000000000..b7172f3df895309eaa6daaac4512ae7d564eae4c --- /dev/null +++ b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003548.log @@ -0,0 +1,2 @@ +2025-08-21 00:35:48 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-21 00:35:51 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). diff --git a/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003740.log b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003740.log new file mode 100644 index 0000000000000000000000000000000000000000..d4e43aa45d037d3fa4a647b6a5c8c79db5b1bf57 --- /dev/null +++ b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_003740.log @@ -0,0 +1,10 @@ +2025-08-21 00:37:40 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-21 00:37:42 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-21 00:37:58 - INFO - Model loaded in 17.64 seconds +2025-08-21 00:37:58 - INFO - GPU Memory Usage after model load: 3250.85 MB +2025-08-21 00:39:14 - INFO - [7b3e4c2f-150e-4db3-a2b2-792ef836f5c3] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 00:39:14 - INFO - [7b3e4c2f-150e-4db3-a2b2-792ef836f5c3] Video saved to temporary file: temp_videos/7b3e4c2f-150e-4db3-a2b2-792ef836f5c3.mp4 +2025-08-21 00:39:14 - INFO - [7b3e4c2f-150e-4db3-a2b2-792ef836f5c3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:39:18 - INFO - [7b3e4c2f-150e-4db3-a2b2-792ef836f5c3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:39:18 - INFO - [7b3e4c2f-150e-4db3-a2b2-792ef836f5c3] 30 frames saved to temp_videos/7b3e4c2f-150e-4db3-a2b2-792ef836f5c3 +2025-08-21 00:39:19 - INFO - Prompt token length: 2306 diff --git a/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_004253.log b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_004253.log new file mode 100644 index 0000000000000000000000000000000000000000..5845c022600fc98d2af574a94de1b5fbe42924e8 --- /dev/null +++ b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_004253.log @@ -0,0 +1,103 @@ +2025-08-21 00:42:53 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-21 00:42:56 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-21 00:43:05 - INFO - Model loaded in 11.91 seconds +2025-08-21 00:43:05 - INFO - GPU Memory Usage after model load: 3250.55 MB +2025-08-21 00:44:34 - INFO - [85d08818-6d68-43fa-a772-626d83ea5d11] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 00:44:34 - INFO - [85d08818-6d68-43fa-a772-626d83ea5d11] Video saved to temporary file: temp_videos/85d08818-6d68-43fa-a772-626d83ea5d11.mp4 +2025-08-21 00:44:34 - INFO - [85d08818-6d68-43fa-a772-626d83ea5d11] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:44:41 - INFO - [85d08818-6d68-43fa-a772-626d83ea5d11] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:44:41 - INFO - [85d08818-6d68-43fa-a772-626d83ea5d11] 30 frames saved to temp_videos/85d08818-6d68-43fa-a772-626d83ea5d11 +2025-08-21 00:44:41 - INFO - Prompt token length: 2306 +2025-08-21 00:44:47 - INFO - Tokens per second: 11.896679103804114, Peak GPU memory MB: 5348.375 +2025-08-21 00:44:47 - INFO - [85d08818-6d68-43fa-a772-626d83ea5d11] Inference time: 12.73 seconds, CPU usage: 20.1%, CPU core utilization: [17.7, 19.0, 21.6, 22.0] +2025-08-21 00:44:47 - INFO - [85d08818-6d68-43fa-a772-626d83ea5d11] Cleaned up temporary frame directory: temp_videos/85d08818-6d68-43fa-a772-626d83ea5d11 +2025-08-21 00:44:47 - INFO - [6f9278db-56d7-44a9-b7f0-7200571a0979] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-21 00:44:47 - INFO - [6f9278db-56d7-44a9-b7f0-7200571a0979] Video saved to temporary file: temp_videos/6f9278db-56d7-44a9-b7f0-7200571a0979.mp4 +2025-08-21 00:44:47 - INFO - [6f9278db-56d7-44a9-b7f0-7200571a0979] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:44:52 - INFO - [6f9278db-56d7-44a9-b7f0-7200571a0979] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:44:52 - INFO - [6f9278db-56d7-44a9-b7f0-7200571a0979] 30 frames saved to temp_videos/6f9278db-56d7-44a9-b7f0-7200571a0979 +2025-08-21 00:44:52 - INFO - Prompt token length: 2306 +2025-08-21 00:44:57 - INFO - Tokens per second: 12.02869428489415, Peak GPU memory MB: 5348.375 +2025-08-21 00:44:57 - INFO - [6f9278db-56d7-44a9-b7f0-7200571a0979] Inference time: 10.58 seconds, CPU usage: 55.1%, CPU core utilization: [42.1, 41.3, 93.8, 43.2] +2025-08-21 00:44:57 - INFO - [6f9278db-56d7-44a9-b7f0-7200571a0979] Cleaned up temporary frame directory: temp_videos/6f9278db-56d7-44a9-b7f0-7200571a0979 +2025-08-21 00:44:57 - INFO - [2835a505-ec18-45e1-9b43-393c4eb0c79a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-21 00:44:57 - INFO - [2835a505-ec18-45e1-9b43-393c4eb0c79a] Video saved to temporary file: temp_videos/2835a505-ec18-45e1-9b43-393c4eb0c79a.mp4 +2025-08-21 00:44:57 - INFO - [2835a505-ec18-45e1-9b43-393c4eb0c79a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:45:02 - INFO - [2835a505-ec18-45e1-9b43-393c4eb0c79a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:45:02 - INFO - [2835a505-ec18-45e1-9b43-393c4eb0c79a] 30 frames saved to temp_videos/2835a505-ec18-45e1-9b43-393c4eb0c79a +2025-08-21 00:45:02 - INFO - Prompt token length: 2306 +2025-08-21 00:45:08 - INFO - Tokens per second: 11.82593643667435, Peak GPU memory MB: 5348.375 +2025-08-21 00:45:08 - INFO - [2835a505-ec18-45e1-9b43-393c4eb0c79a] Inference time: 10.16 seconds, CPU usage: 56.2%, CPU core utilization: [90.9, 44.3, 46.7, 42.8] +2025-08-21 00:45:08 - INFO - [2835a505-ec18-45e1-9b43-393c4eb0c79a] Cleaned up temporary frame directory: temp_videos/2835a505-ec18-45e1-9b43-393c4eb0c79a +2025-08-21 00:45:08 - INFO - [9ad1595c-b1c3-409e-99bb-050a41cf9e9e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-21 00:45:08 - INFO - [9ad1595c-b1c3-409e-99bb-050a41cf9e9e] Video saved to temporary file: temp_videos/9ad1595c-b1c3-409e-99bb-050a41cf9e9e.mp4 +2025-08-21 00:45:08 - INFO - [9ad1595c-b1c3-409e-99bb-050a41cf9e9e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:45:13 - INFO - [9ad1595c-b1c3-409e-99bb-050a41cf9e9e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:45:13 - INFO - [9ad1595c-b1c3-409e-99bb-050a41cf9e9e] 30 frames saved to temp_videos/9ad1595c-b1c3-409e-99bb-050a41cf9e9e +2025-08-21 00:45:13 - INFO - Prompt token length: 2306 +2025-08-21 00:45:19 - INFO - Tokens per second: 11.785621023429538, Peak GPU memory MB: 5348.375 +2025-08-21 00:45:19 - INFO - [9ad1595c-b1c3-409e-99bb-050a41cf9e9e] Inference time: 11.90 seconds, CPU usage: 53.0%, CPU core utilization: [38.8, 90.1, 41.1, 42.3] +2025-08-21 00:45:19 - INFO - [9ad1595c-b1c3-409e-99bb-050a41cf9e9e] Cleaned up temporary frame directory: temp_videos/9ad1595c-b1c3-409e-99bb-050a41cf9e9e +2025-08-21 00:45:19 - INFO - [83ee3b32-7870-4d00-b3f0-d1ec1167d45e] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-21 00:45:19 - INFO - [83ee3b32-7870-4d00-b3f0-d1ec1167d45e] Video saved to temporary file: temp_videos/83ee3b32-7870-4d00-b3f0-d1ec1167d45e.mp4 +2025-08-21 00:45:19 - INFO - [83ee3b32-7870-4d00-b3f0-d1ec1167d45e] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:45:24 - INFO - [83ee3b32-7870-4d00-b3f0-d1ec1167d45e] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:45:24 - INFO - [83ee3b32-7870-4d00-b3f0-d1ec1167d45e] 30 frames saved to temp_videos/83ee3b32-7870-4d00-b3f0-d1ec1167d45e +2025-08-21 00:45:25 - INFO - Prompt token length: 2306 +2025-08-21 00:45:32 - INFO - Tokens per second: 9.017638706034026, Peak GPU memory MB: 5348.375 +2025-08-21 00:45:32 - INFO - [83ee3b32-7870-4d00-b3f0-d1ec1167d45e] Inference time: 12.17 seconds, CPU usage: 75.1%, CPU core utilization: [69.4, 92.0, 68.0, 70.7] +2025-08-21 00:45:32 - INFO - [83ee3b32-7870-4d00-b3f0-d1ec1167d45e] Cleaned up temporary frame directory: temp_videos/83ee3b32-7870-4d00-b3f0-d1ec1167d45e +2025-08-21 00:45:50 - INFO - [91458b58-07b8-4a0e-bbec-63fde300aebc] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 00:45:50 - INFO - [91458b58-07b8-4a0e-bbec-63fde300aebc] Video saved to temporary file: temp_videos/91458b58-07b8-4a0e-bbec-63fde300aebc.mp4 +2025-08-21 00:45:50 - INFO - [91458b58-07b8-4a0e-bbec-63fde300aebc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:45:57 - INFO - [91458b58-07b8-4a0e-bbec-63fde300aebc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:45:57 - INFO - [91458b58-07b8-4a0e-bbec-63fde300aebc] 30 frames saved to temp_videos/91458b58-07b8-4a0e-bbec-63fde300aebc +2025-08-21 00:45:57 - INFO - Prompt token length: 2296 +2025-08-21 00:46:18 - INFO - Tokens per second: 11.854063880552362, Peak GPU memory MB: 5348.375 +2025-08-21 00:46:18 - INFO - [91458b58-07b8-4a0e-bbec-63fde300aebc] Inference time: 28.38 seconds, CPU usage: 43.3%, CPU core utilization: [34.4, 74.4, 32.5, 31.9] +2025-08-21 00:46:18 - INFO - [91458b58-07b8-4a0e-bbec-63fde300aebc] Cleaned up temporary frame directory: temp_videos/91458b58-07b8-4a0e-bbec-63fde300aebc +2025-08-21 00:46:18 - INFO - [65b42141-20bf-4cf1-92b1-f29d846146ab] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-21 00:46:18 - INFO - [65b42141-20bf-4cf1-92b1-f29d846146ab] Video saved to temporary file: temp_videos/65b42141-20bf-4cf1-92b1-f29d846146ab.mp4 +2025-08-21 00:46:18 - INFO - [65b42141-20bf-4cf1-92b1-f29d846146ab] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:46:23 - INFO - [65b42141-20bf-4cf1-92b1-f29d846146ab] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:46:23 - INFO - [65b42141-20bf-4cf1-92b1-f29d846146ab] 30 frames saved to temp_videos/65b42141-20bf-4cf1-92b1-f29d846146ab +2025-08-21 00:46:23 - INFO - Prompt token length: 2296 +2025-08-21 00:46:47 - INFO - Tokens per second: 11.997021386458192, Peak GPU memory MB: 5348.375 +2025-08-21 00:46:47 - INFO - [65b42141-20bf-4cf1-92b1-f29d846146ab] Inference time: 29.08 seconds, CPU usage: 37.0%, CPU core utilization: [33.9, 16.1, 80.3, 17.7] +2025-08-21 00:46:47 - INFO - [65b42141-20bf-4cf1-92b1-f29d846146ab] Cleaned up temporary frame directory: temp_videos/65b42141-20bf-4cf1-92b1-f29d846146ab +2025-08-21 00:46:47 - INFO - [2ff4de72-4fa0-4759-9211-626a4f60c683] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-21 00:46:47 - INFO - [2ff4de72-4fa0-4759-9211-626a4f60c683] Video saved to temporary file: temp_videos/2ff4de72-4fa0-4759-9211-626a4f60c683.mp4 +2025-08-21 00:46:47 - INFO - [2ff4de72-4fa0-4759-9211-626a4f60c683] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:46:52 - INFO - [2ff4de72-4fa0-4759-9211-626a4f60c683] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:46:52 - INFO - [2ff4de72-4fa0-4759-9211-626a4f60c683] 30 frames saved to temp_videos/2ff4de72-4fa0-4759-9211-626a4f60c683 +2025-08-21 00:46:52 - INFO - Prompt token length: 2296 +2025-08-21 00:47:16 - INFO - Tokens per second: 12.037390307990146, Peak GPU memory MB: 5348.375 +2025-08-21 00:47:16 - INFO - [2ff4de72-4fa0-4759-9211-626a4f60c683] Inference time: 29.02 seconds, CPU usage: 37.2%, CPU core utilization: [48.4, 16.9, 65.1, 18.1] +2025-08-21 00:47:16 - INFO - [2ff4de72-4fa0-4759-9211-626a4f60c683] Cleaned up temporary frame directory: temp_videos/2ff4de72-4fa0-4759-9211-626a4f60c683 +2025-08-21 00:47:16 - INFO - [68a0b698-fcf0-4e8b-b0cb-e03797f97561] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-21 00:47:16 - INFO - [68a0b698-fcf0-4e8b-b0cb-e03797f97561] Video saved to temporary file: temp_videos/68a0b698-fcf0-4e8b-b0cb-e03797f97561.mp4 +2025-08-21 00:47:16 - INFO - [68a0b698-fcf0-4e8b-b0cb-e03797f97561] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:47:21 - INFO - [68a0b698-fcf0-4e8b-b0cb-e03797f97561] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:47:21 - INFO - [68a0b698-fcf0-4e8b-b0cb-e03797f97561] 30 frames saved to temp_videos/68a0b698-fcf0-4e8b-b0cb-e03797f97561 +2025-08-21 00:47:21 - INFO - Prompt token length: 2296 +2025-08-21 00:47:45 - INFO - Tokens per second: 12.027123899562989, Peak GPU memory MB: 5348.375 +2025-08-21 00:47:45 - INFO - [68a0b698-fcf0-4e8b-b0cb-e03797f97561] Inference time: 29.08 seconds, CPU usage: 36.9%, CPU core utilization: [74.2, 17.8, 15.5, 40.0] +2025-08-21 00:47:45 - INFO - [68a0b698-fcf0-4e8b-b0cb-e03797f97561] Cleaned up temporary frame directory: temp_videos/68a0b698-fcf0-4e8b-b0cb-e03797f97561 +2025-08-21 00:47:45 - INFO - [6b5a1c52-b835-40af-b34e-b1b24b36ca95] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-21 00:47:45 - INFO - [6b5a1c52-b835-40af-b34e-b1b24b36ca95] Video saved to temporary file: temp_videos/6b5a1c52-b835-40af-b34e-b1b24b36ca95.mp4 +2025-08-21 00:47:45 - INFO - [6b5a1c52-b835-40af-b34e-b1b24b36ca95] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:47:50 - INFO - [6b5a1c52-b835-40af-b34e-b1b24b36ca95] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:47:50 - INFO - [6b5a1c52-b835-40af-b34e-b1b24b36ca95] 30 frames saved to temp_videos/6b5a1c52-b835-40af-b34e-b1b24b36ca95 +2025-08-21 00:47:50 - INFO - Prompt token length: 2296 +2025-08-21 00:48:07 - INFO - Tokens per second: 11.998806395924422, Peak GPU memory MB: 5348.375 +2025-08-21 00:48:07 - INFO - [6b5a1c52-b835-40af-b34e-b1b24b36ca95] Inference time: 21.52 seconds, CPU usage: 40.1%, CPU core utilization: [93.9, 22.7, 21.5, 22.1] +2025-08-21 00:48:07 - INFO - [6b5a1c52-b835-40af-b34e-b1b24b36ca95] Cleaned up temporary frame directory: temp_videos/6b5a1c52-b835-40af-b34e-b1b24b36ca95 +2025-08-21 00:48:07 - INFO - [f6c17199-243f-477c-8b92-175e7d81c801] Received new video inference request. Prompt: 'Please describe the video in detail, only focus on customer and staff behavior and activities and do not overly describe the static scene.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-21 00:48:07 - INFO - [f6c17199-243f-477c-8b92-175e7d81c801] Video saved to temporary file: temp_videos/f6c17199-243f-477c-8b92-175e7d81c801.mp4 +2025-08-21 00:48:07 - INFO - [f6c17199-243f-477c-8b92-175e7d81c801] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:48:12 - INFO - [f6c17199-243f-477c-8b92-175e7d81c801] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:48:12 - INFO - [f6c17199-243f-477c-8b92-175e7d81c801] 30 frames saved to temp_videos/f6c17199-243f-477c-8b92-175e7d81c801 +2025-08-21 00:48:12 - INFO - Prompt token length: 2296 +2025-08-21 00:48:36 - INFO - Tokens per second: 12.045229786817497, Peak GPU memory MB: 5348.375 +2025-08-21 00:48:36 - INFO - [f6c17199-243f-477c-8b92-175e7d81c801] Inference time: 29.18 seconds, CPU usage: 37.2%, CPU core utilization: [44.5, 28.8, 58.7, 16.6] +2025-08-21 00:48:36 - INFO - [f6c17199-243f-477c-8b92-175e7d81c801] Cleaned up temporary frame directory: temp_videos/f6c17199-243f-477c-8b92-175e7d81c801 diff --git a/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_004907.log b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_004907.log new file mode 100644 index 0000000000000000000000000000000000000000..00e9ef32dea2b413c7b35fdfbe20f4e31e1fd9aa --- /dev/null +++ b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_004907.log @@ -0,0 +1,145 @@ +2025-08-21 00:49:07 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-21 00:49:11 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-21 00:49:19 - INFO - Model loaded in 11.79 seconds +2025-08-21 00:49:19 - INFO - GPU Memory Usage after model load: 3250.55 MB +2025-08-21 00:50:53 - INFO - [30fe7962-43c7-418e-9663-3cf53776c810] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 00:50:53 - INFO - [30fe7962-43c7-418e-9663-3cf53776c810] Video saved to temporary file: temp_videos/30fe7962-43c7-418e-9663-3cf53776c810.mp4 +2025-08-21 00:50:53 - INFO - [30fe7962-43c7-418e-9663-3cf53776c810] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:50:58 - INFO - [30fe7962-43c7-418e-9663-3cf53776c810] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:50:58 - INFO - [30fe7962-43c7-418e-9663-3cf53776c810] 30 frames saved to temp_videos/30fe7962-43c7-418e-9663-3cf53776c810 +2025-08-21 00:50:58 - INFO - Prompt token length: 2306 +2025-08-21 00:51:05 - INFO - Tokens per second: 11.82581726573877, Peak GPU memory MB: 5348.375 +2025-08-21 00:51:05 - INFO - [30fe7962-43c7-418e-9663-3cf53776c810] Inference time: 11.48 seconds, CPU usage: 19.9%, CPU core utilization: [17.8, 19.9, 21.8, 20.2] +2025-08-21 00:51:05 - INFO - [30fe7962-43c7-418e-9663-3cf53776c810] Cleaned up temporary frame directory: temp_videos/30fe7962-43c7-418e-9663-3cf53776c810 +2025-08-21 00:51:05 - INFO - [a3af8f29-02fa-49b6-bcb7-c671f274c93a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-21 00:51:05 - INFO - [a3af8f29-02fa-49b6-bcb7-c671f274c93a] Video saved to temporary file: temp_videos/a3af8f29-02fa-49b6-bcb7-c671f274c93a.mp4 +2025-08-21 00:51:05 - INFO - [a3af8f29-02fa-49b6-bcb7-c671f274c93a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:51:09 - INFO - [a3af8f29-02fa-49b6-bcb7-c671f274c93a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:51:09 - INFO - [a3af8f29-02fa-49b6-bcb7-c671f274c93a] 30 frames saved to temp_videos/a3af8f29-02fa-49b6-bcb7-c671f274c93a +2025-08-21 00:51:10 - INFO - Prompt token length: 2306 +2025-08-21 00:51:15 - INFO - Tokens per second: 11.74072921403584, Peak GPU memory MB: 5348.375 +2025-08-21 00:51:15 - INFO - [a3af8f29-02fa-49b6-bcb7-c671f274c93a] Inference time: 10.66 seconds, CPU usage: 56.2%, CPU core utilization: [43.8, 43.4, 43.8, 93.4] +2025-08-21 00:51:15 - INFO - [a3af8f29-02fa-49b6-bcb7-c671f274c93a] Cleaned up temporary frame directory: temp_videos/a3af8f29-02fa-49b6-bcb7-c671f274c93a +2025-08-21 00:51:15 - INFO - [be2cf942-7b83-46a1-80f4-3341fc34fdda] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-21 00:51:15 - INFO - [be2cf942-7b83-46a1-80f4-3341fc34fdda] Video saved to temporary file: temp_videos/be2cf942-7b83-46a1-80f4-3341fc34fdda.mp4 +2025-08-21 00:51:15 - INFO - [be2cf942-7b83-46a1-80f4-3341fc34fdda] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:51:20 - INFO - [be2cf942-7b83-46a1-80f4-3341fc34fdda] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:51:20 - INFO - [be2cf942-7b83-46a1-80f4-3341fc34fdda] 30 frames saved to temp_videos/be2cf942-7b83-46a1-80f4-3341fc34fdda +2025-08-21 00:51:20 - INFO - Prompt token length: 2306 +2025-08-21 00:51:27 - INFO - Tokens per second: 11.73304837389127, Peak GPU memory MB: 5348.375 +2025-08-21 00:51:27 - INFO - [be2cf942-7b83-46a1-80f4-3341fc34fdda] Inference time: 11.55 seconds, CPU usage: 52.1%, CPU core utilization: [38.3, 93.8, 38.3, 38.0] +2025-08-21 00:51:27 - INFO - [be2cf942-7b83-46a1-80f4-3341fc34fdda] Cleaned up temporary frame directory: temp_videos/be2cf942-7b83-46a1-80f4-3341fc34fdda +2025-08-21 00:51:27 - INFO - [1d4fb530-0fc9-438f-a51a-cabce02b6cb7] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-21 00:51:27 - INFO - [1d4fb530-0fc9-438f-a51a-cabce02b6cb7] Video saved to temporary file: temp_videos/1d4fb530-0fc9-438f-a51a-cabce02b6cb7.mp4 +2025-08-21 00:51:27 - INFO - [1d4fb530-0fc9-438f-a51a-cabce02b6cb7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:51:32 - INFO - [1d4fb530-0fc9-438f-a51a-cabce02b6cb7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:51:32 - INFO - [1d4fb530-0fc9-438f-a51a-cabce02b6cb7] 30 frames saved to temp_videos/1d4fb530-0fc9-438f-a51a-cabce02b6cb7 +2025-08-21 00:51:32 - INFO - Prompt token length: 2306 +2025-08-21 00:51:38 - INFO - Tokens per second: 11.929480284506932, Peak GPU memory MB: 5348.375 +2025-08-21 00:51:38 - INFO - [1d4fb530-0fc9-438f-a51a-cabce02b6cb7] Inference time: 11.57 seconds, CPU usage: 52.3%, CPU core utilization: [89.2, 38.6, 42.9, 38.3] +2025-08-21 00:51:38 - INFO - [1d4fb530-0fc9-438f-a51a-cabce02b6cb7] Cleaned up temporary frame directory: temp_videos/1d4fb530-0fc9-438f-a51a-cabce02b6cb7 +2025-08-21 00:51:38 - INFO - [ce11e297-0569-49ac-85bc-050e43e84448] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-21 00:51:38 - INFO - [ce11e297-0569-49ac-85bc-050e43e84448] Video saved to temporary file: temp_videos/ce11e297-0569-49ac-85bc-050e43e84448.mp4 +2025-08-21 00:51:38 - INFO - [ce11e297-0569-49ac-85bc-050e43e84448] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:51:43 - INFO - [ce11e297-0569-49ac-85bc-050e43e84448] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:51:43 - INFO - [ce11e297-0569-49ac-85bc-050e43e84448] 30 frames saved to temp_videos/ce11e297-0569-49ac-85bc-050e43e84448 +2025-08-21 00:51:43 - INFO - Prompt token length: 2306 +2025-08-21 00:51:50 - INFO - Tokens per second: 11.89941941740383, Peak GPU memory MB: 5348.375 +2025-08-21 00:51:50 - INFO - [ce11e297-0569-49ac-85bc-050e43e84448] Inference time: 11.12 seconds, CPU usage: 53.2%, CPU core utilization: [37.8, 40.6, 93.6, 40.6] +2025-08-21 00:51:50 - INFO - [ce11e297-0569-49ac-85bc-050e43e84448] Cleaned up temporary frame directory: temp_videos/ce11e297-0569-49ac-85bc-050e43e84448 +2025-08-21 00:51:50 - INFO - [5cec6dd5-3430-473f-aa6a-0d81b6475f34] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-21 00:51:50 - INFO - [5cec6dd5-3430-473f-aa6a-0d81b6475f34] Video saved to temporary file: temp_videos/5cec6dd5-3430-473f-aa6a-0d81b6475f34.mp4 +2025-08-21 00:51:50 - INFO - [5cec6dd5-3430-473f-aa6a-0d81b6475f34] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:51:54 - INFO - [5cec6dd5-3430-473f-aa6a-0d81b6475f34] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:51:54 - INFO - [5cec6dd5-3430-473f-aa6a-0d81b6475f34] 30 frames saved to temp_videos/5cec6dd5-3430-473f-aa6a-0d81b6475f34 +2025-08-21 00:51:55 - INFO - Prompt token length: 2306 +2025-08-21 00:52:00 - INFO - Tokens per second: 11.881699260124632, Peak GPU memory MB: 5348.375 +2025-08-21 00:52:00 - INFO - [5cec6dd5-3430-473f-aa6a-0d81b6475f34] Inference time: 10.77 seconds, CPU usage: 53.6%, CPU core utilization: [59.2, 66.8, 47.9, 40.4] +2025-08-21 00:52:00 - INFO - [5cec6dd5-3430-473f-aa6a-0d81b6475f34] Cleaned up temporary frame directory: temp_videos/5cec6dd5-3430-473f-aa6a-0d81b6475f34 +2025-08-21 00:52:21 - INFO - [70289040-01c3-4ed8-83de-5a2d9996ed2d] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_001.mp4' +2025-08-21 00:52:21 - INFO - [70289040-01c3-4ed8-83de-5a2d9996ed2d] Video saved to temporary file: temp_videos/70289040-01c3-4ed8-83de-5a2d9996ed2d.mp4 +2025-08-21 00:52:21 - INFO - [70289040-01c3-4ed8-83de-5a2d9996ed2d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:52:26 - INFO - [70289040-01c3-4ed8-83de-5a2d9996ed2d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:52:26 - INFO - [70289040-01c3-4ed8-83de-5a2d9996ed2d] 30 frames saved to temp_videos/70289040-01c3-4ed8-83de-5a2d9996ed2d +2025-08-21 00:52:26 - INFO - Prompt token length: 2305 +2025-08-21 00:52:32 - INFO - Tokens per second: 11.87194334609885, Peak GPU memory MB: 5348.375 +2025-08-21 00:52:32 - INFO - [70289040-01c3-4ed8-83de-5a2d9996ed2d] Inference time: 10.96 seconds, CPU usage: 20.3%, CPU core utilization: [15.5, 15.3, 33.8, 16.7] +2025-08-21 00:52:32 - INFO - [70289040-01c3-4ed8-83de-5a2d9996ed2d] Cleaned up temporary frame directory: temp_videos/70289040-01c3-4ed8-83de-5a2d9996ed2d +2025-08-21 00:52:32 - INFO - [a8bb150b-138f-4300-adf1-fa15dbace647] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_002.mp4' +2025-08-21 00:52:32 - INFO - [a8bb150b-138f-4300-adf1-fa15dbace647] Video saved to temporary file: temp_videos/a8bb150b-138f-4300-adf1-fa15dbace647.mp4 +2025-08-21 00:52:32 - INFO - [a8bb150b-138f-4300-adf1-fa15dbace647] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:52:37 - INFO - [a8bb150b-138f-4300-adf1-fa15dbace647] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:52:37 - INFO - [a8bb150b-138f-4300-adf1-fa15dbace647] 30 frames saved to temp_videos/a8bb150b-138f-4300-adf1-fa15dbace647 +2025-08-21 00:52:37 - INFO - Prompt token length: 2305 +2025-08-21 00:52:44 - INFO - Tokens per second: 11.83286096302887, Peak GPU memory MB: 5348.375 +2025-08-21 00:52:44 - INFO - [a8bb150b-138f-4300-adf1-fa15dbace647] Inference time: 11.96 seconds, CPU usage: 52.1%, CPU core utilization: [39.5, 38.2, 37.0, 93.5] +2025-08-21 00:52:44 - INFO - [a8bb150b-138f-4300-adf1-fa15dbace647] Cleaned up temporary frame directory: temp_videos/a8bb150b-138f-4300-adf1-fa15dbace647 +2025-08-21 00:52:44 - INFO - [6f5c9723-cae6-47ab-8cc7-6942f1ad38d4] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_003.mp4' +2025-08-21 00:52:44 - INFO - [6f5c9723-cae6-47ab-8cc7-6942f1ad38d4] Video saved to temporary file: temp_videos/6f5c9723-cae6-47ab-8cc7-6942f1ad38d4.mp4 +2025-08-21 00:52:44 - INFO - [6f5c9723-cae6-47ab-8cc7-6942f1ad38d4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:52:49 - INFO - [6f5c9723-cae6-47ab-8cc7-6942f1ad38d4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:52:49 - INFO - [6f5c9723-cae6-47ab-8cc7-6942f1ad38d4] 30 frames saved to temp_videos/6f5c9723-cae6-47ab-8cc7-6942f1ad38d4 +2025-08-21 00:52:49 - INFO - Prompt token length: 2305 +2025-08-21 00:52:56 - INFO - Tokens per second: 11.760994323642526, Peak GPU memory MB: 5348.375 +2025-08-21 00:52:56 - INFO - [6f5c9723-cae6-47ab-8cc7-6942f1ad38d4] Inference time: 11.29 seconds, CPU usage: 53.3%, CPU core utilization: [40.3, 75.1, 39.1, 58.8] +2025-08-21 00:52:56 - INFO - [6f5c9723-cae6-47ab-8cc7-6942f1ad38d4] Cleaned up temporary frame directory: temp_videos/6f5c9723-cae6-47ab-8cc7-6942f1ad38d4 +2025-08-21 00:52:56 - INFO - [0d1e6fde-165d-4649-8878-c71f32a33f71] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_004.mp4' +2025-08-21 00:52:56 - INFO - [0d1e6fde-165d-4649-8878-c71f32a33f71] Video saved to temporary file: temp_videos/0d1e6fde-165d-4649-8878-c71f32a33f71.mp4 +2025-08-21 00:52:56 - INFO - [0d1e6fde-165d-4649-8878-c71f32a33f71] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:53:00 - INFO - [0d1e6fde-165d-4649-8878-c71f32a33f71] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:53:01 - INFO - [0d1e6fde-165d-4649-8878-c71f32a33f71] 30 frames saved to temp_videos/0d1e6fde-165d-4649-8878-c71f32a33f71 +2025-08-21 00:53:01 - INFO - Prompt token length: 2305 +2025-08-21 00:53:07 - INFO - Tokens per second: 11.888364051217732, Peak GPU memory MB: 5348.375 +2025-08-21 00:53:07 - INFO - [0d1e6fde-165d-4649-8878-c71f32a33f71] Inference time: 11.70 seconds, CPU usage: 51.6%, CPU core utilization: [59.1, 67.7, 41.2, 38.5] +2025-08-21 00:53:07 - INFO - [0d1e6fde-165d-4649-8878-c71f32a33f71] Cleaned up temporary frame directory: temp_videos/0d1e6fde-165d-4649-8878-c71f32a33f71 +2025-08-21 00:53:07 - INFO - [884171ad-eeda-4dd5-9f1b-d43868fa7804] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_005.mp4' +2025-08-21 00:53:07 - INFO - [884171ad-eeda-4dd5-9f1b-d43868fa7804] Video saved to temporary file: temp_videos/884171ad-eeda-4dd5-9f1b-d43868fa7804.mp4 +2025-08-21 00:53:07 - INFO - [884171ad-eeda-4dd5-9f1b-d43868fa7804] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:53:12 - INFO - [884171ad-eeda-4dd5-9f1b-d43868fa7804] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:53:12 - INFO - [884171ad-eeda-4dd5-9f1b-d43868fa7804] 30 frames saved to temp_videos/884171ad-eeda-4dd5-9f1b-d43868fa7804 +2025-08-21 00:53:12 - INFO - Prompt token length: 2305 +2025-08-21 00:53:18 - INFO - Tokens per second: 11.790592742669173, Peak GPU memory MB: 5348.375 +2025-08-21 00:53:18 - INFO - [884171ad-eeda-4dd5-9f1b-d43868fa7804] Inference time: 10.74 seconds, CPU usage: 55.3%, CPU core utilization: [53.6, 43.0, 81.8, 42.7] +2025-08-21 00:53:18 - INFO - [884171ad-eeda-4dd5-9f1b-d43868fa7804] Cleaned up temporary frame directory: temp_videos/884171ad-eeda-4dd5-9f1b-d43868fa7804 +2025-08-21 00:53:18 - INFO - [ce71041b-d894-486d-9e37-8b9a86705705] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_006.mp4' +2025-08-21 00:53:18 - INFO - [ce71041b-d894-486d-9e37-8b9a86705705] Video saved to temporary file: temp_videos/ce71041b-d894-486d-9e37-8b9a86705705.mp4 +2025-08-21 00:53:18 - INFO - [ce71041b-d894-486d-9e37-8b9a86705705] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:53:23 - INFO - [ce71041b-d894-486d-9e37-8b9a86705705] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:53:23 - INFO - [ce71041b-d894-486d-9e37-8b9a86705705] 30 frames saved to temp_videos/ce71041b-d894-486d-9e37-8b9a86705705 +2025-08-21 00:53:23 - INFO - Prompt token length: 2305 +2025-08-21 00:53:28 - INFO - Tokens per second: 11.854764118772119, Peak GPU memory MB: 5348.375 +2025-08-21 00:53:28 - INFO - [ce71041b-d894-486d-9e37-8b9a86705705] Inference time: 10.06 seconds, CPU usage: 55.9%, CPU core utilization: [48.9, 44.4, 87.7, 42.4] +2025-08-21 00:53:28 - INFO - [ce71041b-d894-486d-9e37-8b9a86705705] Cleaned up temporary frame directory: temp_videos/ce71041b-d894-486d-9e37-8b9a86705705 +2025-08-21 00:53:28 - INFO - [a135f93a-1ac9-4578-a1c4-2b1aeb89afda] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_007.mp4' +2025-08-21 00:53:28 - INFO - [a135f93a-1ac9-4578-a1c4-2b1aeb89afda] Video saved to temporary file: temp_videos/a135f93a-1ac9-4578-a1c4-2b1aeb89afda.mp4 +2025-08-21 00:53:28 - INFO - [a135f93a-1ac9-4578-a1c4-2b1aeb89afda] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:53:33 - INFO - [a135f93a-1ac9-4578-a1c4-2b1aeb89afda] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:53:33 - INFO - [a135f93a-1ac9-4578-a1c4-2b1aeb89afda] 30 frames saved to temp_videos/a135f93a-1ac9-4578-a1c4-2b1aeb89afda +2025-08-21 00:53:33 - INFO - Prompt token length: 2305 +2025-08-21 00:53:40 - INFO - Tokens per second: 11.806017274209756, Peak GPU memory MB: 5348.375 +2025-08-21 00:53:40 - INFO - [a135f93a-1ac9-4578-a1c4-2b1aeb89afda] Inference time: 12.00 seconds, CPU usage: 51.4%, CPU core utilization: [49.4, 37.7, 81.8, 36.8] +2025-08-21 00:53:40 - INFO - [a135f93a-1ac9-4578-a1c4-2b1aeb89afda] Cleaned up temporary frame directory: temp_videos/a135f93a-1ac9-4578-a1c4-2b1aeb89afda +2025-08-21 00:53:40 - INFO - [6873e58b-3473-4224-909d-3159c03588e5] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_008.mp4' +2025-08-21 00:53:40 - INFO - [6873e58b-3473-4224-909d-3159c03588e5] Video saved to temporary file: temp_videos/6873e58b-3473-4224-909d-3159c03588e5.mp4 +2025-08-21 00:53:40 - INFO - [6873e58b-3473-4224-909d-3159c03588e5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:53:45 - INFO - [6873e58b-3473-4224-909d-3159c03588e5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:53:45 - INFO - [6873e58b-3473-4224-909d-3159c03588e5] 30 frames saved to temp_videos/6873e58b-3473-4224-909d-3159c03588e5 +2025-08-21 00:53:45 - INFO - Prompt token length: 2305 +2025-08-21 00:53:51 - INFO - Tokens per second: 11.878890265234213, Peak GPU memory MB: 5348.375 +2025-08-21 00:53:51 - INFO - [6873e58b-3473-4224-909d-3159c03588e5] Inference time: 10.61 seconds, CPU usage: 55.0%, CPU core utilization: [90.1, 42.2, 45.8, 41.8] +2025-08-21 00:53:51 - INFO - [6873e58b-3473-4224-909d-3159c03588e5] Cleaned up temporary frame directory: temp_videos/6873e58b-3473-4224-909d-3159c03588e5 +2025-08-21 00:53:51 - INFO - [6356c394-1484-4391-b145-81215ba47ee8] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_009.mp4' +2025-08-21 00:53:51 - INFO - [6356c394-1484-4391-b145-81215ba47ee8] Video saved to temporary file: temp_videos/6356c394-1484-4391-b145-81215ba47ee8.mp4 +2025-08-21 00:53:51 - INFO - [6356c394-1484-4391-b145-81215ba47ee8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:53:56 - INFO - [6356c394-1484-4391-b145-81215ba47ee8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:53:56 - INFO - [6356c394-1484-4391-b145-81215ba47ee8] 30 frames saved to temp_videos/6356c394-1484-4391-b145-81215ba47ee8 +2025-08-21 00:53:56 - INFO - Prompt token length: 2305 +2025-08-21 00:54:02 - INFO - Tokens per second: 11.82995179235076, Peak GPU memory MB: 5348.375 +2025-08-21 00:54:02 - INFO - [6356c394-1484-4391-b145-81215ba47ee8] Inference time: 10.80 seconds, CPU usage: 53.8%, CPU core utilization: [49.9, 41.3, 78.9, 45.0] +2025-08-21 00:54:02 - INFO - [6356c394-1484-4391-b145-81215ba47ee8] Cleaned up temporary frame directory: temp_videos/6356c394-1484-4391-b145-81215ba47ee8 +2025-08-21 00:54:02 - INFO - [2999105b-10bc-497e-8931-352c7d9d65e6] Received new video inference request. Prompt: 'Summarize the key events in this convenience store video. Focus only on the actions and interactions of the people. Avoid repetitive descriptions of the store's layout or shelves.', Video: '/mnt/data/xiuying/Code/local_deploy/video/Clips_60s/sample_part_010.mp4' +2025-08-21 00:54:02 - INFO - [2999105b-10bc-497e-8931-352c7d9d65e6] Video saved to temporary file: temp_videos/2999105b-10bc-497e-8931-352c7d9d65e6.mp4 +2025-08-21 00:54:02 - INFO - [2999105b-10bc-497e-8931-352c7d9d65e6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 00:54:07 - INFO - [2999105b-10bc-497e-8931-352c7d9d65e6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 00:54:07 - INFO - [2999105b-10bc-497e-8931-352c7d9d65e6] 30 frames saved to temp_videos/2999105b-10bc-497e-8931-352c7d9d65e6 +2025-08-21 00:54:07 - INFO - Prompt token length: 2305 diff --git a/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_014204.log b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_014204.log new file mode 100644 index 0000000000000000000000000000000000000000..fe3ec8ed4e780d5e14978dd0050b2cc9b00a1e47 --- /dev/null +++ b/API_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250821_014204.log @@ -0,0 +1,148 @@ +2025-08-21 01:42:04 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-21 01:42:09 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-21 01:42:40 - INFO - Model loaded in 35.77 seconds +2025-08-21 01:42:40 - INFO - GPU Memory Usage after model load: 3250.55 MB +2025-08-21 02:54:09 - INFO - [c40f2273-a9f5-4d96-82d4-990269ab9708] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_001.mp4' +2025-08-21 02:54:09 - INFO - [c40f2273-a9f5-4d96-82d4-990269ab9708] Video saved to temporary file: temp_videos/c40f2273-a9f5-4d96-82d4-990269ab9708.mp4 +2025-08-21 02:54:09 - INFO - [c40f2273-a9f5-4d96-82d4-990269ab9708] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:54:13 - INFO - [c40f2273-a9f5-4d96-82d4-990269ab9708] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:54:13 - INFO - [c40f2273-a9f5-4d96-82d4-990269ab9708] 30 frames saved to temp_videos/c40f2273-a9f5-4d96-82d4-990269ab9708 +2025-08-21 02:54:13 - INFO - Prompt token length: 2306 +2025-08-21 02:54:23 - INFO - Tokens per second: 11.859020159952623, Peak GPU memory MB: 5350.375 +2025-08-21 02:54:23 - INFO - [c40f2273-a9f5-4d96-82d4-990269ab9708] Inference time: 14.12 seconds, CPU usage: 2.0%, CPU core utilization: [2.0, 2.0, 1.9, 1.9] +2025-08-21 02:54:23 - INFO - [c40f2273-a9f5-4d96-82d4-990269ab9708] Cleaned up temporary frame directory: temp_videos/c40f2273-a9f5-4d96-82d4-990269ab9708 +2025-08-21 02:54:23 - INFO - [1bbf302e-4b0b-4363-bddd-3fb826552587] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_002.mp4' +2025-08-21 02:54:23 - INFO - [1bbf302e-4b0b-4363-bddd-3fb826552587] Video saved to temporary file: temp_videos/1bbf302e-4b0b-4363-bddd-3fb826552587.mp4 +2025-08-21 02:54:23 - INFO - [1bbf302e-4b0b-4363-bddd-3fb826552587] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:54:27 - INFO - [1bbf302e-4b0b-4363-bddd-3fb826552587] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:54:27 - INFO - [1bbf302e-4b0b-4363-bddd-3fb826552587] 30 frames saved to temp_videos/1bbf302e-4b0b-4363-bddd-3fb826552587 +2025-08-21 02:54:27 - INFO - Prompt token length: 2306 +2025-08-21 02:54:34 - INFO - Tokens per second: 12.033912631174916, Peak GPU memory MB: 5350.375 +2025-08-21 02:54:34 - INFO - [1bbf302e-4b0b-4363-bddd-3fb826552587] Inference time: 10.49 seconds, CPU usage: 44.1%, CPU core utilization: [80.0, 27.5, 40.1, 29.0] +2025-08-21 02:54:34 - INFO - [1bbf302e-4b0b-4363-bddd-3fb826552587] Cleaned up temporary frame directory: temp_videos/1bbf302e-4b0b-4363-bddd-3fb826552587 +2025-08-21 02:54:34 - INFO - [48b38709-fb9f-4c1d-9db6-279fea58e01f] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_003.mp4' +2025-08-21 02:54:34 - INFO - [48b38709-fb9f-4c1d-9db6-279fea58e01f] Video saved to temporary file: temp_videos/48b38709-fb9f-4c1d-9db6-279fea58e01f.mp4 +2025-08-21 02:54:34 - INFO - [48b38709-fb9f-4c1d-9db6-279fea58e01f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:54:37 - INFO - [48b38709-fb9f-4c1d-9db6-279fea58e01f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:54:37 - INFO - [48b38709-fb9f-4c1d-9db6-279fea58e01f] 30 frames saved to temp_videos/48b38709-fb9f-4c1d-9db6-279fea58e01f +2025-08-21 02:54:37 - INFO - Prompt token length: 2306 +2025-08-21 02:54:45 - INFO - Tokens per second: 11.980873759204092, Peak GPU memory MB: 5350.375 +2025-08-21 02:54:45 - INFO - [48b38709-fb9f-4c1d-9db6-279fea58e01f] Inference time: 10.84 seconds, CPU usage: 43.7%, CPU core utilization: [49.1, 32.4, 65.3, 27.8] +2025-08-21 02:54:45 - INFO - [48b38709-fb9f-4c1d-9db6-279fea58e01f] Cleaned up temporary frame directory: temp_videos/48b38709-fb9f-4c1d-9db6-279fea58e01f +2025-08-21 02:54:45 - INFO - [218b6cb4-0c13-4223-b6be-fbc881774b17] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_004.mp4' +2025-08-21 02:54:45 - INFO - [218b6cb4-0c13-4223-b6be-fbc881774b17] Video saved to temporary file: temp_videos/218b6cb4-0c13-4223-b6be-fbc881774b17.mp4 +2025-08-21 02:54:45 - INFO - [218b6cb4-0c13-4223-b6be-fbc881774b17] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:54:48 - INFO - [218b6cb4-0c13-4223-b6be-fbc881774b17] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:54:48 - INFO - [218b6cb4-0c13-4223-b6be-fbc881774b17] 30 frames saved to temp_videos/218b6cb4-0c13-4223-b6be-fbc881774b17 +2025-08-21 02:54:48 - INFO - Prompt token length: 2306 +2025-08-21 02:55:13 - INFO - Tokens per second: 11.894932301505968, Peak GPU memory MB: 5350.375 +2025-08-21 02:55:13 - INFO - [218b6cb4-0c13-4223-b6be-fbc881774b17] Inference time: 27.98 seconds, CPU usage: 33.8%, CPU core utilization: [13.9, 45.9, 13.3, 61.9] +2025-08-21 02:55:13 - INFO - [218b6cb4-0c13-4223-b6be-fbc881774b17] Cleaned up temporary frame directory: temp_videos/218b6cb4-0c13-4223-b6be-fbc881774b17 +2025-08-21 02:55:13 - INFO - [6550b43c-430e-4dee-8467-1a05b4c082cd] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_005.mp4' +2025-08-21 02:55:13 - INFO - [6550b43c-430e-4dee-8467-1a05b4c082cd] Video saved to temporary file: temp_videos/6550b43c-430e-4dee-8467-1a05b4c082cd.mp4 +2025-08-21 02:55:13 - INFO - [6550b43c-430e-4dee-8467-1a05b4c082cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:55:16 - INFO - [6550b43c-430e-4dee-8467-1a05b4c082cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:55:16 - INFO - [6550b43c-430e-4dee-8467-1a05b4c082cd] 30 frames saved to temp_videos/6550b43c-430e-4dee-8467-1a05b4c082cd +2025-08-21 02:55:16 - INFO - Prompt token length: 2306 +2025-08-21 02:55:25 - INFO - Tokens per second: 11.99842860278374, Peak GPU memory MB: 5350.375 +2025-08-21 02:55:25 - INFO - [6550b43c-430e-4dee-8467-1a05b4c082cd] Inference time: 12.41 seconds, CPU usage: 40.7%, CPU core utilization: [34.0, 38.6, 64.3, 25.9] +2025-08-21 02:55:25 - INFO - [6550b43c-430e-4dee-8467-1a05b4c082cd] Cleaned up temporary frame directory: temp_videos/6550b43c-430e-4dee-8467-1a05b4c082cd +2025-08-21 02:55:25 - INFO - [172a602d-213b-41d6-b892-e7ca06e535bc] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_006.mp4' +2025-08-21 02:55:25 - INFO - [172a602d-213b-41d6-b892-e7ca06e535bc] Video saved to temporary file: temp_videos/172a602d-213b-41d6-b892-e7ca06e535bc.mp4 +2025-08-21 02:55:25 - INFO - [172a602d-213b-41d6-b892-e7ca06e535bc] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:55:28 - INFO - [172a602d-213b-41d6-b892-e7ca06e535bc] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:55:28 - INFO - [172a602d-213b-41d6-b892-e7ca06e535bc] 30 frames saved to temp_videos/172a602d-213b-41d6-b892-e7ca06e535bc +2025-08-21 02:55:29 - INFO - Prompt token length: 2306 +2025-08-21 02:55:40 - INFO - Tokens per second: 11.862422969421846, Peak GPU memory MB: 5350.375 +2025-08-21 02:55:40 - INFO - [172a602d-213b-41d6-b892-e7ca06e535bc] Inference time: 15.04 seconds, CPU usage: 39.3%, CPU core utilization: [21.5, 43.6, 21.6, 70.6] +2025-08-21 02:55:40 - INFO - [172a602d-213b-41d6-b892-e7ca06e535bc] Cleaned up temporary frame directory: temp_videos/172a602d-213b-41d6-b892-e7ca06e535bc +2025-08-21 02:55:40 - INFO - [082b484d-e219-4cde-ac8e-8af5b8f380cd] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_007.mp4' +2025-08-21 02:55:40 - INFO - [082b484d-e219-4cde-ac8e-8af5b8f380cd] Video saved to temporary file: temp_videos/082b484d-e219-4cde-ac8e-8af5b8f380cd.mp4 +2025-08-21 02:55:40 - INFO - [082b484d-e219-4cde-ac8e-8af5b8f380cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:55:43 - INFO - [082b484d-e219-4cde-ac8e-8af5b8f380cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:55:43 - INFO - [082b484d-e219-4cde-ac8e-8af5b8f380cd] 30 frames saved to temp_videos/082b484d-e219-4cde-ac8e-8af5b8f380cd +2025-08-21 02:55:44 - INFO - Prompt token length: 2306 +2025-08-21 02:55:52 - INFO - Tokens per second: 12.007495276914103, Peak GPU memory MB: 5350.375 +2025-08-21 02:55:52 - INFO - [082b484d-e219-4cde-ac8e-8af5b8f380cd] Inference time: 11.83 seconds, CPU usage: 42.8%, CPU core utilization: [60.5, 34.4, 49.7, 26.5] +2025-08-21 02:55:52 - INFO - [082b484d-e219-4cde-ac8e-8af5b8f380cd] Cleaned up temporary frame directory: temp_videos/082b484d-e219-4cde-ac8e-8af5b8f380cd +2025-08-21 02:55:52 - INFO - [d4aec199-0b7e-4058-b8ba-bdfbb7806fca] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_008.mp4' +2025-08-21 02:55:52 - INFO - [d4aec199-0b7e-4058-b8ba-bdfbb7806fca] Video saved to temporary file: temp_videos/d4aec199-0b7e-4058-b8ba-bdfbb7806fca.mp4 +2025-08-21 02:55:52 - INFO - [d4aec199-0b7e-4058-b8ba-bdfbb7806fca] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:55:55 - INFO - [d4aec199-0b7e-4058-b8ba-bdfbb7806fca] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:55:55 - INFO - [d4aec199-0b7e-4058-b8ba-bdfbb7806fca] 30 frames saved to temp_videos/d4aec199-0b7e-4058-b8ba-bdfbb7806fca +2025-08-21 02:55:56 - INFO - Prompt token length: 2306 +2025-08-21 02:56:04 - INFO - Tokens per second: 11.871294681994929, Peak GPU memory MB: 5350.375 +2025-08-21 02:56:04 - INFO - [d4aec199-0b7e-4058-b8ba-bdfbb7806fca] Inference time: 12.13 seconds, CPU usage: 43.3%, CPU core utilization: [35.9, 32.5, 78.1, 26.8] +2025-08-21 02:56:04 - INFO - [d4aec199-0b7e-4058-b8ba-bdfbb7806fca] Cleaned up temporary frame directory: temp_videos/d4aec199-0b7e-4058-b8ba-bdfbb7806fca +2025-08-21 02:56:04 - INFO - [20eacc2f-2a33-4211-b488-f449c4bbc64d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_009.mp4' +2025-08-21 02:56:04 - INFO - [20eacc2f-2a33-4211-b488-f449c4bbc64d] Video saved to temporary file: temp_videos/20eacc2f-2a33-4211-b488-f449c4bbc64d.mp4 +2025-08-21 02:56:04 - INFO - [20eacc2f-2a33-4211-b488-f449c4bbc64d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:56:07 - INFO - [20eacc2f-2a33-4211-b488-f449c4bbc64d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:56:07 - INFO - [20eacc2f-2a33-4211-b488-f449c4bbc64d] 30 frames saved to temp_videos/20eacc2f-2a33-4211-b488-f449c4bbc64d +2025-08-21 02:56:08 - INFO - Prompt token length: 2306 +2025-08-21 02:56:15 - INFO - Tokens per second: 11.63501242448262, Peak GPU memory MB: 5350.375 +2025-08-21 02:56:15 - INFO - [20eacc2f-2a33-4211-b488-f449c4bbc64d] Inference time: 10.73 seconds, CPU usage: 46.3%, CPU core utilization: [38.3, 58.2, 31.7, 56.7] +2025-08-21 02:56:15 - INFO - [20eacc2f-2a33-4211-b488-f449c4bbc64d] Cleaned up temporary frame directory: temp_videos/20eacc2f-2a33-4211-b488-f449c4bbc64d +2025-08-21 02:56:15 - INFO - [7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_010.mp4' +2025-08-21 02:56:15 - INFO - [7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5] Video saved to temporary file: temp_videos/7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5.mp4 +2025-08-21 02:56:15 - INFO - [7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:56:18 - INFO - [7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:56:18 - INFO - [7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5] 30 frames saved to temp_videos/7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5 +2025-08-21 02:56:18 - INFO - Prompt token length: 2306 +2025-08-21 02:56:31 - INFO - Tokens per second: 11.874488678953208, Peak GPU memory MB: 5350.375 +2025-08-21 02:56:31 - INFO - [7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5] Inference time: 16.08 seconds, CPU usage: 37.8%, CPU core utilization: [19.6, 68.7, 18.4, 44.3] +2025-08-21 02:56:31 - INFO - [7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5] Cleaned up temporary frame directory: temp_videos/7bd61912-f2d8-49f3-a1d2-d25a5bb09ff5 +2025-08-21 02:56:31 - INFO - [305ccf60-14df-466d-8565-f04265430ba1] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_011.mp4' +2025-08-21 02:56:31 - INFO - [305ccf60-14df-466d-8565-f04265430ba1] Video saved to temporary file: temp_videos/305ccf60-14df-466d-8565-f04265430ba1.mp4 +2025-08-21 02:56:31 - INFO - [305ccf60-14df-466d-8565-f04265430ba1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:56:34 - INFO - [305ccf60-14df-466d-8565-f04265430ba1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:56:34 - INFO - [305ccf60-14df-466d-8565-f04265430ba1] 30 frames saved to temp_videos/305ccf60-14df-466d-8565-f04265430ba1 +2025-08-21 02:56:35 - INFO - Prompt token length: 2306 +2025-08-21 02:56:44 - INFO - Tokens per second: 11.829041430743297, Peak GPU memory MB: 5350.375 +2025-08-21 02:56:44 - INFO - [305ccf60-14df-466d-8565-f04265430ba1] Inference time: 12.93 seconds, CPU usage: 42.0%, CPU core utilization: [28.8, 42.5, 25.9, 70.9] +2025-08-21 02:56:44 - INFO - [305ccf60-14df-466d-8565-f04265430ba1] Cleaned up temporary frame directory: temp_videos/305ccf60-14df-466d-8565-f04265430ba1 +2025-08-21 02:56:44 - INFO - [659dc8e0-c40a-432f-887e-c9cdeefc17a4] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_012.mp4' +2025-08-21 02:56:44 - INFO - [659dc8e0-c40a-432f-887e-c9cdeefc17a4] Video saved to temporary file: temp_videos/659dc8e0-c40a-432f-887e-c9cdeefc17a4.mp4 +2025-08-21 02:56:44 - INFO - [659dc8e0-c40a-432f-887e-c9cdeefc17a4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:56:47 - INFO - [659dc8e0-c40a-432f-887e-c9cdeefc17a4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:56:47 - INFO - [659dc8e0-c40a-432f-887e-c9cdeefc17a4] 30 frames saved to temp_videos/659dc8e0-c40a-432f-887e-c9cdeefc17a4 +2025-08-21 02:56:48 - INFO - Prompt token length: 2306 +2025-08-21 02:56:58 - INFO - Tokens per second: 11.928726359703456, Peak GPU memory MB: 5350.375 +2025-08-21 02:56:58 - INFO - [659dc8e0-c40a-432f-887e-c9cdeefc17a4] Inference time: 13.75 seconds, CPU usage: 39.8%, CPU core utilization: [31.6, 62.3, 41.3, 23.8] +2025-08-21 02:56:58 - INFO - [659dc8e0-c40a-432f-887e-c9cdeefc17a4] Cleaned up temporary frame directory: temp_videos/659dc8e0-c40a-432f-887e-c9cdeefc17a4 +2025-08-21 02:56:58 - INFO - [05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_013.mp4' +2025-08-21 02:56:58 - INFO - [05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989] Video saved to temporary file: temp_videos/05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989.mp4 +2025-08-21 02:56:58 - INFO - [05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:57:01 - INFO - [05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:57:01 - INFO - [05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989] 30 frames saved to temp_videos/05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989 +2025-08-21 02:57:01 - INFO - Prompt token length: 2306 +2025-08-21 02:57:07 - INFO - Tokens per second: 12.014726651428436, Peak GPU memory MB: 5350.375 +2025-08-21 02:57:07 - INFO - [05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989] Inference time: 9.37 seconds, CPU usage: 43.8%, CPU core utilization: [29.4, 29.6, 88.1, 27.9] +2025-08-21 02:57:07 - INFO - [05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989] Cleaned up temporary frame directory: temp_videos/05a4c1b9-d6d6-4e4e-a0f2-f58a0664c989 +2025-08-21 02:57:07 - INFO - [0f5076d3-96af-4d28-be73-0db23c76eaf4] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_014.mp4' +2025-08-21 02:57:07 - INFO - [0f5076d3-96af-4d28-be73-0db23c76eaf4] Video saved to temporary file: temp_videos/0f5076d3-96af-4d28-be73-0db23c76eaf4.mp4 +2025-08-21 02:57:07 - INFO - [0f5076d3-96af-4d28-be73-0db23c76eaf4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:57:10 - INFO - [0f5076d3-96af-4d28-be73-0db23c76eaf4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:57:10 - INFO - [0f5076d3-96af-4d28-be73-0db23c76eaf4] 30 frames saved to temp_videos/0f5076d3-96af-4d28-be73-0db23c76eaf4 +2025-08-21 02:57:11 - INFO - Prompt token length: 2306 +2025-08-21 02:57:19 - INFO - Tokens per second: 11.861972979079045, Peak GPU memory MB: 5350.375 +2025-08-21 02:57:19 - INFO - [0f5076d3-96af-4d28-be73-0db23c76eaf4] Inference time: 11.61 seconds, CPU usage: 41.6%, CPU core utilization: [42.9, 26.4, 27.0, 69.9] +2025-08-21 02:57:19 - INFO - [0f5076d3-96af-4d28-be73-0db23c76eaf4] Cleaned up temporary frame directory: temp_videos/0f5076d3-96af-4d28-be73-0db23c76eaf4 +2025-08-21 02:57:19 - INFO - [a60e4adc-5a10-496e-8dba-e95fa8204801] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_015.mp4' +2025-08-21 02:57:19 - INFO - [a60e4adc-5a10-496e-8dba-e95fa8204801] Video saved to temporary file: temp_videos/a60e4adc-5a10-496e-8dba-e95fa8204801.mp4 +2025-08-21 02:57:19 - INFO - [a60e4adc-5a10-496e-8dba-e95fa8204801] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:57:22 - INFO - [a60e4adc-5a10-496e-8dba-e95fa8204801] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:57:22 - INFO - [a60e4adc-5a10-496e-8dba-e95fa8204801] 30 frames saved to temp_videos/a60e4adc-5a10-496e-8dba-e95fa8204801 +2025-08-21 02:57:22 - INFO - Prompt token length: 2306 +2025-08-21 02:57:31 - INFO - Tokens per second: 12.034885208983422, Peak GPU memory MB: 5350.375 +2025-08-21 02:57:31 - INFO - [a60e4adc-5a10-496e-8dba-e95fa8204801] Inference time: 12.68 seconds, CPU usage: 39.5%, CPU core utilization: [59.8, 22.7, 53.7, 22.1] +2025-08-21 02:57:31 - INFO - [a60e4adc-5a10-496e-8dba-e95fa8204801] Cleaned up temporary frame directory: temp_videos/a60e4adc-5a10-496e-8dba-e95fa8204801 +2025-08-21 02:57:31 - INFO - [262c15ae-e353-4d00-b508-c4d77d75300a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s/video_part_016.mp4' +2025-08-21 02:57:31 - INFO - [262c15ae-e353-4d00-b508-c4d77d75300a] Video saved to temporary file: temp_videos/262c15ae-e353-4d00-b508-c4d77d75300a.mp4 +2025-08-21 02:57:31 - INFO - [262c15ae-e353-4d00-b508-c4d77d75300a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-21 02:57:35 - INFO - [262c15ae-e353-4d00-b508-c4d77d75300a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-21 02:57:35 - INFO - [262c15ae-e353-4d00-b508-c4d77d75300a] 30 frames saved to temp_videos/262c15ae-e353-4d00-b508-c4d77d75300a +2025-08-21 02:57:35 - INFO - Prompt token length: 2306 +2025-08-21 02:57:59 - INFO - Tokens per second: 12.052444962168167, Peak GPU memory MB: 5350.375 +2025-08-21 02:57:59 - INFO - [262c15ae-e353-4d00-b508-c4d77d75300a] Inference time: 27.71 seconds, CPU usage: 33.2%, CPU core utilization: [31.4, 17.1, 70.7, 13.4] +2025-08-21 02:57:59 - INFO - [262c15ae-e353-4d00-b508-c4d77d75300a] Cleaned up temporary frame directory: temp_videos/262c15ae-e353-4d00-b508-c4d77d75300a diff --git a/API_Transformers/logs/gemma-3-4b-it/20250819_005014.log b/API_Transformers/logs/gemma-3-4b-it/20250819_005014.log new file mode 100644 index 0000000000000000000000000000000000000000..faf7ae99e0af864fbd42358c55589aa39f35a9cb --- /dev/null +++ b/API_Transformers/logs/gemma-3-4b-it/20250819_005014.log @@ -0,0 +1,28 @@ +2025-08-19 00:50:14 - INFO - Loading model: google/gemma-3-4b-it +2025-08-19 00:50:16 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-19 00:51:28 - INFO - Model loaded in 73.81 seconds +2025-08-19 00:51:28 - INFO - GPU Memory Usage after model load: 8201.85 MB +2025-08-19 00:51:34 - INFO - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 00:51:34 - INFO - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] Video saved to temporary file: temp_videos/cd4de5c8-a57c-41ff-8d88-71dda9ce333f.mp4 +2025-08-19 00:51:34 - INFO - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:51:37 - INFO - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:51:37 - INFO - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] 30 frames saved to temp_videos/cd4de5c8-a57c-41ff-8d88-71dda9ce333f +2025-08-19 00:51:37 - ERROR - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] An error occurred during processing: Incorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image. +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/local_deploy/infer.py", line 107, in video_inference + output = model.generate(frame_paths, prompt) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/local_deploy/models/gemma.py", line 56, in generate + inputs = self.processor.apply_chat_template( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/utils/deprecation.py", line 172, in wrapped_func + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/processing_utils.py", line 1552, in apply_chat_template + images.append(load_image(fname)) + ^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/image_utils.py", line 493, in load_image + raise TypeError( +TypeError: Incorrect format used for image. Should be an url linking to an image, a base64 string, a local path, or a PIL image. +2025-08-19 00:51:37 - INFO - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] Cleaned up temporary file: temp_videos/cd4de5c8-a57c-41ff-8d88-71dda9ce333f.mp4 +2025-08-19 00:51:37 - INFO - [cd4de5c8-a57c-41ff-8d88-71dda9ce333f] Cleaned up temporary frame directory: temp_videos/cd4de5c8-a57c-41ff-8d88-71dda9ce333f diff --git a/API_Transformers/logs/gemma-3-4b-it/20250819_005535.log b/API_Transformers/logs/gemma-3-4b-it/20250819_005535.log new file mode 100644 index 0000000000000000000000000000000000000000..f68465262bcd827ed7df76e6cb65c6ce36ac6e6a --- /dev/null +++ b/API_Transformers/logs/gemma-3-4b-it/20250819_005535.log @@ -0,0 +1,10 @@ +2025-08-19 00:55:35 - INFO - Loading model: google/gemma-3-4b-it +2025-08-19 00:55:37 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-19 00:55:50 - INFO - Model loaded in 14.81 seconds +2025-08-19 00:55:50 - INFO - GPU Memory Usage after model load: 8201.85 MB +2025-08-19 00:55:58 - INFO - [0cfe1e16-f6d4-4f20-9091-9719eee547e3] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 00:55:58 - INFO - [0cfe1e16-f6d4-4f20-9091-9719eee547e3] Video saved to temporary file: temp_videos/0cfe1e16-f6d4-4f20-9091-9719eee547e3.mp4 +2025-08-19 00:55:58 - INFO - [0cfe1e16-f6d4-4f20-9091-9719eee547e3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-19 00:56:04 - INFO - [0cfe1e16-f6d4-4f20-9091-9719eee547e3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-19 00:56:04 - INFO - [0cfe1e16-f6d4-4f20-9091-9719eee547e3] 30 frames saved to temp_videos/0cfe1e16-f6d4-4f20-9091-9719eee547e3 +2025-08-19 00:56:05 - INFO - Prompt token length: 7961 diff --git a/API_Transformers/logs/gemma-3-4b-it/20250819_010310.log b/API_Transformers/logs/gemma-3-4b-it/20250819_010310.log new file mode 100644 index 0000000000000000000000000000000000000000..ce4c800b6cccb3553c93f62910f8d31218c84673 --- /dev/null +++ b/API_Transformers/logs/gemma-3-4b-it/20250819_010310.log @@ -0,0 +1,10 @@ +2025-08-19 01:03:10 - INFO - Loading model: google/gemma-3-4b-it +2025-08-19 01:03:11 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-19 01:03:37 - INFO - Model loaded in 26.97 seconds +2025-08-19 01:03:37 - INFO - GPU Memory Usage after model load: 8201.85 MB +2025-08-19 01:03:58 - INFO - [ddbb264c-a911-43d4-aee3-8aebd82a1e83] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 01:03:58 - INFO - [ddbb264c-a911-43d4-aee3-8aebd82a1e83] Video saved to temporary file: temp_videos/ddbb264c-a911-43d4-aee3-8aebd82a1e83.mp4 +2025-08-19 01:03:58 - INFO - [ddbb264c-a911-43d4-aee3-8aebd82a1e83] Extracting frames using method: uniform, rate/threshold: 5 +2025-08-19 01:03:58 - INFO - [ddbb264c-a911-43d4-aee3-8aebd82a1e83] Extracted 5 frames successfully. Saving to temporary files... +2025-08-19 01:03:58 - INFO - [ddbb264c-a911-43d4-aee3-8aebd82a1e83] 5 frames saved to temp_videos/ddbb264c-a911-43d4-aee3-8aebd82a1e83 +2025-08-19 01:03:58 - INFO - Prompt token length: 1317 diff --git a/API_Transformers/logs/gemma-3-4b-it/20250819_010523.log b/API_Transformers/logs/gemma-3-4b-it/20250819_010523.log new file mode 100644 index 0000000000000000000000000000000000000000..d2dd290dccf1fe969a2234197fea1e9faf1d9fdf --- /dev/null +++ b/API_Transformers/logs/gemma-3-4b-it/20250819_010523.log @@ -0,0 +1,10 @@ +2025-08-19 01:05:23 - INFO - Loading model: google/gemma-3-4b-it +2025-08-19 01:05:24 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-19 01:05:41 - INFO - Model loaded in 17.79 seconds +2025-08-19 01:05:41 - INFO - GPU Memory Usage after model load: 8201.85 MB +2025-08-19 01:05:53 - INFO - [918ef8af-5a2d-4683-95b6-92a2aa6dbe57] Received new video inference request. Prompt: 'Please describe the video.', Video: 'messi_part_001.mp4' +2025-08-19 01:05:53 - INFO - [918ef8af-5a2d-4683-95b6-92a2aa6dbe57] Video saved to temporary file: temp_videos/918ef8af-5a2d-4683-95b6-92a2aa6dbe57.mp4 +2025-08-19 01:05:53 - INFO - [918ef8af-5a2d-4683-95b6-92a2aa6dbe57] Extracting frames using method: uniform, rate/threshold: 1 +2025-08-19 01:05:53 - INFO - [918ef8af-5a2d-4683-95b6-92a2aa6dbe57] Extracted 1 frames successfully. Saving to temporary files... +2025-08-19 01:05:53 - INFO - [918ef8af-5a2d-4683-95b6-92a2aa6dbe57] 1 frames saved to temp_videos/918ef8af-5a2d-4683-95b6-92a2aa6dbe57 +2025-08-19 01:05:53 - INFO - Prompt token length: 281 diff --git a/API_Transformers/messi/Clips_30s/messi_part_001.mp4 b/API_Transformers/messi/Clips_30s/messi_part_001.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..13b90760b89630dbd94564526b7e436fd68528af --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_001.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:034781ddfc5a0fc650c1c5a4b2bde6c7899eacb39e1198194969ce59a7ea85f7 +size 5344002 diff --git a/API_Transformers/messi/Clips_30s/messi_part_002.mp4 b/API_Transformers/messi/Clips_30s/messi_part_002.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5d4c4eafc5190ae6555f84a69da8bfe0bd7ff41f --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_002.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79ccc24659bf7c26880f5446a4e60bc0b53817c0c441224943757be6f9ce377b +size 5825993 diff --git a/API_Transformers/messi/Clips_30s/messi_part_003.mp4 b/API_Transformers/messi/Clips_30s/messi_part_003.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..197b50737ce532a21913ba2a1abb046d123c30ac --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_003.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d313ff247867cbbdf23c96ee18257fed74a06f03d7f8a051f4d7695015e0344 +size 6682487 diff --git a/API_Transformers/messi/Clips_30s/messi_part_004.mp4 b/API_Transformers/messi/Clips_30s/messi_part_004.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8c8306cc371aa3551c1e4d61b038bfe4dac13499 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_004.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:969b5e7d4fec19eaa6f71ffe4e54c13bd197d8ca0274cb97384e2a425acfd73f +size 8157849 diff --git a/API_Transformers/messi/Clips_30s/messi_part_005.mp4 b/API_Transformers/messi/Clips_30s/messi_part_005.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2b0fbde267c3897bb4a42c1f2167ff948afe247e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_005.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9a7f993c90830f50d2c8cc9192cdae2e1dbe5947a549af79aa37fdb56fdac8 +size 5229919 diff --git a/API_Transformers/messi/Clips_30s/messi_part_006.mp4 b/API_Transformers/messi/Clips_30s/messi_part_006.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..cf0d2145792233f57954c807d7377314939c2b9d --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_006.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6382294ddbfc91e4b0692b6b225c40e0df1222f3be2e96a5763227e1241ebf0a +size 6306804 diff --git a/API_Transformers/messi/Clips_30s/messi_part_007.mp4 b/API_Transformers/messi/Clips_30s/messi_part_007.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..57be51463cf6cd2b131d47a23e2a44af47eca811 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_007.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f5a24fd86931bab0459103d7ceb1829f342c28f6a0f1069ad2cb528123aab5 +size 7639331 diff --git a/API_Transformers/messi/Clips_30s/messi_part_008.mp4 b/API_Transformers/messi/Clips_30s/messi_part_008.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6c03b2b4f07da5d912b9269b54ff98f05ecd98d0 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_008.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80bbd46e02aaca2401dbbd4763de028db0c2bcbb92f1f857bb7cf6c9ebadd8e5 +size 6883576 diff --git a/API_Transformers/messi/Clips_30s/messi_part_009.mp4 b/API_Transformers/messi/Clips_30s/messi_part_009.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0d4a06e332b5d76b4579a36f0fee5fe6ac27e0a9 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_009.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c400468949212177acae4555f65945c53dc0b19f514e7e397583b0a45393110 +size 6162521 diff --git a/API_Transformers/messi/Clips_30s/messi_part_010.mp4 b/API_Transformers/messi/Clips_30s/messi_part_010.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..dffcfa98a862f31eb4067a3aa047e2a8be8e2bd1 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_010.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc2beb3b4b0307c083b5162a7bde4103e80c5de8a83fbbc6b19c9ba65a0cd3d +size 5916943 diff --git a/API_Transformers/messi/Clips_30s/messi_part_011.mp4 b/API_Transformers/messi/Clips_30s/messi_part_011.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..052a630f5bd457f010fed7eb2ef7c84e0ad58fd2 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_011.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad4d91b0576632538b3c256e11d4a899e025100866425b79d5fa9140618424b +size 6930061 diff --git a/API_Transformers/messi/Clips_30s/messi_part_012.mp4 b/API_Transformers/messi/Clips_30s/messi_part_012.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..db6b44865d797e5224f76f7fbb1da2d755f43c2a --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_012.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67112ba880f288f107cefba957ae9aa3d27b4fee8f587320f480c6c6fd58fcdf +size 6937495 diff --git a/API_Transformers/messi/Clips_30s/messi_part_013.mp4 b/API_Transformers/messi/Clips_30s/messi_part_013.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5ee2f552b0365a037919d015d20bbd1de672810d --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_013.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12f0c4eac052d78f367019a82a368d67525e80ad04389f78ca936e16d82ec031 +size 5529421 diff --git a/API_Transformers/messi/Clips_30s/messi_part_014.mp4 b/API_Transformers/messi/Clips_30s/messi_part_014.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9d6f92152f66b5801758f6ab17992957f9548cee --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_014.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d535152382212cca7fb8cbdf1d0799aceabb9d0760652d5991bd544735a91c +size 5996084 diff --git a/API_Transformers/messi/Clips_30s/messi_part_015.mp4 b/API_Transformers/messi/Clips_30s/messi_part_015.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..10948932954cdb8a6c020119c79864e512decee1 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_015.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15e44367131009ab302f6aa8e1df4e48aac07c57bc027c958a265dfeb7d5e9b8 +size 5802513 diff --git a/API_Transformers/messi/Clips_30s/messi_part_016.mp4 b/API_Transformers/messi/Clips_30s/messi_part_016.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c665bce89874388fb29765bf64d6abbf30325701 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_016.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20cb91910b480991ad870aad8d88698695f4b8b70a6833ab4cd4e97d6664dc53 +size 5687903 diff --git a/API_Transformers/messi/Clips_30s/messi_part_017.mp4 b/API_Transformers/messi/Clips_30s/messi_part_017.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2bbb7bc928df45986516a4950a3bf002d194b911 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_017.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf809ef6c6d9dc6b873c9db70040d3687fb96591e132e5aeb1649a3ae1093e41 +size 5653555 diff --git a/API_Transformers/messi/Clips_30s/messi_part_018.mp4 b/API_Transformers/messi/Clips_30s/messi_part_018.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2b172bab3482517d8404d439466bd06e97aeba52 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_018.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c65c4c0e5a475c68d4cab4ef7d28a8e7197eebb777e1bef9a3e3d2de7e7cc8a +size 5249682 diff --git a/API_Transformers/messi/Clips_30s/messi_part_019.mp4 b/API_Transformers/messi/Clips_30s/messi_part_019.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..29237c587c8a0c14a9e58ae4383784ad7cb9b0db --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_019.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a35d93b829c31171b99690453c8f3f3bbcfd083a1520a33f10b85f632d7351ba +size 6417139 diff --git a/API_Transformers/messi/Clips_30s/messi_part_020.mp4 b/API_Transformers/messi/Clips_30s/messi_part_020.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..50f687d58f54a4167b2cb5dad10c666f7d10d2dc --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_020.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4133ed2fd3ab8d3b16b0bd3f2a6a33ef1bd681483a8b22437a50acac2526096 +size 6748670 diff --git a/API_Transformers/messi/Clips_30s/messi_part_021.mp4 b/API_Transformers/messi/Clips_30s/messi_part_021.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d708df200970b03171177017d541d3ecc9520299 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_021.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f836ddc1563250097a1de8b561cf706217c51cc99fecf37d78f3d521c0ae26e +size 5065872 diff --git a/API_Transformers/messi/Clips_30s/messi_part_022.mp4 b/API_Transformers/messi/Clips_30s/messi_part_022.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5e8cb55b8501ba30ed4d11e3064a4afd318d9cbe --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_022.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467d054565285b4a8ae85eca1f6c09da14a24085ad36b026ca60c18c0923013a +size 6540457 diff --git a/API_Transformers/messi/Clips_30s/messi_part_023.mp4 b/API_Transformers/messi/Clips_30s/messi_part_023.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9c06f04d1ffd1a1254d21189c53451dda156ef78 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_023.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f43bd12af9620360f6adac327ad171b66611655cd476302745c82efe1236cd4 +size 5302030 diff --git a/API_Transformers/messi/Clips_30s/messi_part_024.mp4 b/API_Transformers/messi/Clips_30s/messi_part_024.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5c290dd21f05bcdb740e6270c7d91ae2d40aebc0 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_024.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127c9be7187426559433652911735152dd93b8c0e11ad2d36ce4b6966ec33052 +size 4969929 diff --git a/API_Transformers/messi/Clips_30s/messi_part_025.mp4 b/API_Transformers/messi/Clips_30s/messi_part_025.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..dfc674be3017fba63db56667da16978a993fadca --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_025.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13854acb3dcfa9e91e5d73b0dced6181f8cb87af280735c2b4c9693e6a338458 +size 6651879 diff --git a/API_Transformers/messi/Clips_30s/messi_part_026.mp4 b/API_Transformers/messi/Clips_30s/messi_part_026.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b7cfdd8416f3eac1d2712856f0dc537023b98391 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_026.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:180993cfb9065fa358a7954f8d0bb3c3acd44cb4bbf9060288dcce3aebd200d2 +size 6869755 diff --git a/API_Transformers/messi/Clips_30s/messi_part_027.mp4 b/API_Transformers/messi/Clips_30s/messi_part_027.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..61055c559cbe38c79af7b7cedeb9e7b3012c045a --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_027.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75d525af1a3da8e6803149810b643763d26899df41de6903e2db19093267f5e7 +size 5062680 diff --git a/API_Transformers/messi/Clips_30s/messi_part_028.mp4 b/API_Transformers/messi/Clips_30s/messi_part_028.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..68c9d07e4e68611a1631e2e1f1f9cf9314ff7d4e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_028.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ca9915d050462e1829a8d43cde9969e183ced2d92dddacd9b14eb4231cb4961 +size 5634441 diff --git a/API_Transformers/messi/Clips_30s/messi_part_029.mp4 b/API_Transformers/messi/Clips_30s/messi_part_029.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..08439c5ecac1db3c737517bafc4f563900968718 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_029.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f20d1ada1e8a551f2b7d271695f24e2993a0e840067b3aff7f946375a64aa7a +size 6737202 diff --git a/API_Transformers/messi/Clips_30s/messi_part_030.mp4 b/API_Transformers/messi/Clips_30s/messi_part_030.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6ed5951574ec1e97770f954b0d0f255cedbea92b --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_030.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4dfbe7bf8e0d022a7e31c57b4e2ef449435f32d9a055e87a157afe9484d6d75 +size 8423255 diff --git a/API_Transformers/messi/Clips_30s/messi_part_031.mp4 b/API_Transformers/messi/Clips_30s/messi_part_031.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f69ef3e5306ed721a727019b1d59fd2641d7d319 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_031.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca4e4cefa602c476e313e8eed3c86d12432ad2bee673f3e511aa8f82f2ae52bf +size 6413575 diff --git a/API_Transformers/messi/Clips_30s/messi_part_032.mp4 b/API_Transformers/messi/Clips_30s/messi_part_032.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b370712e1283721c4b7c3b0b66a288dfc61666cf --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_032.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4334b36a8715d80c2549ff13bd9fc2c09c7a53057f8df89c8f4a1eceddd35cd9 +size 6054474 diff --git a/API_Transformers/messi/Clips_30s/messi_part_033.mp4 b/API_Transformers/messi/Clips_30s/messi_part_033.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d66080d41f6e9d7fc124e7f89ccbae672df88bfe --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_033.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301327e8d5bd84b2fee7e18591f3e528f2aff1766223fd209587c904a228f5d0 +size 7681841 diff --git a/API_Transformers/messi/Clips_30s/messi_part_034.mp4 b/API_Transformers/messi/Clips_30s/messi_part_034.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..876a8f879814a33772d97e6c38fa43b50db9b773 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_034.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:074f56abfcbe88880a9a744d00a99a6de9d5dccee28bfdfea30661b356bc4d47 +size 6436689 diff --git a/API_Transformers/messi/Clips_30s/messi_part_035.mp4 b/API_Transformers/messi/Clips_30s/messi_part_035.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bc29ab1fd40cbd9a21a3ab07ef4026e6490c628e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_035.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a39dee77a12ef405d7069e65f2a04edf710e959e543f4e8cc4a8350a42d5836 +size 6352833 diff --git a/API_Transformers/messi/Clips_30s/messi_part_036.mp4 b/API_Transformers/messi/Clips_30s/messi_part_036.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d22e6d10b0782b6db1edbd2c3d4e80e63b89a929 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_036.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ece5855c58d6da4457b4530c8cfbedb6383d0ec0506044b7073636983a540bad +size 7215247 diff --git a/API_Transformers/messi/Clips_30s/messi_part_037.mp4 b/API_Transformers/messi/Clips_30s/messi_part_037.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..95f7326ffa919c3ee6c25e4fbff1f044ec87a4e4 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_037.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34be14c7c2115ce7b6266ed5fa82923631af08bf44d7f505b54939cdf735ba3e +size 5626018 diff --git a/API_Transformers/messi/Clips_30s/messi_part_038.mp4 b/API_Transformers/messi/Clips_30s/messi_part_038.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..45fc9ef6603bcba2e67f98364482b07fd7afd441 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_038.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dac94e302aa5635bf2fd9e8d2726020ae48ee6cf71aa0072917b1acb40ba0d3 +size 6619333 diff --git a/API_Transformers/messi/Clips_30s/messi_part_039.mp4 b/API_Transformers/messi/Clips_30s/messi_part_039.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..69864fa0b030d0a9abc8fb84fe4bb0b2786f311f --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_039.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2cd564225e8c7802484ad79bb5740cda0f11f3f28f5f2cb67733b623ab7d606 +size 6054425 diff --git a/API_Transformers/messi/Clips_30s/messi_part_040.mp4 b/API_Transformers/messi/Clips_30s/messi_part_040.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..596792e85fd4e85328d034c662fc28b37fbaa5b7 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_040.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a893fa79501a97f42ad7abfcc8f77d71c2cc669576a241d9a51cce749e4958 +size 5980803 diff --git a/API_Transformers/messi/Clips_30s/messi_part_041.mp4 b/API_Transformers/messi/Clips_30s/messi_part_041.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..02f09fc5e14ec4bd793f30667d086bc7ac746a1d --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_041.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03a7cb84d32d3706717fea9730baddf8ec1aa775563530e97041f8533abdb6ac +size 5260421 diff --git a/API_Transformers/messi/Clips_30s/messi_part_042.mp4 b/API_Transformers/messi/Clips_30s/messi_part_042.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..439a2e74c480f8e2f5d247b1e580f650a42e6c3e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_042.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1711e544f99089df9ef40c97a5101205040e71cfd70882e7805d8f2afe12f8e2 +size 8964116 diff --git a/API_Transformers/messi/Clips_30s/messi_part_043.mp4 b/API_Transformers/messi/Clips_30s/messi_part_043.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4aa3843de2e601bc734a6645357e37acee0f05ac --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_043.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f25ffb47dd51bbc605cce43f5c62d7174d6094f218d548b4e72435d89d00f0a +size 7544464 diff --git a/API_Transformers/messi/Clips_30s/messi_part_044.mp4 b/API_Transformers/messi/Clips_30s/messi_part_044.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1626fbf8b397293cea197b086aa15a34e0f98b23 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_044.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a466008a6515808d6fde6043b629e369c06ee690c4e474f78e04806d70ceac3 +size 5735755 diff --git a/API_Transformers/messi/Clips_30s/messi_part_045.mp4 b/API_Transformers/messi/Clips_30s/messi_part_045.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..865f33eac82c104558655acaa1326c5323ca65bb --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_045.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39662daf4cf2508106c90536f74f3b6fd59aff1b4bf490f9f9c391235d146b83 +size 6434796 diff --git a/API_Transformers/messi/Clips_30s/messi_part_046.mp4 b/API_Transformers/messi/Clips_30s/messi_part_046.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..36715862fd26f654879783e29cac73f60e03d05d --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_046.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3124fb1b6b6f78a63baa5551c98315a30e7bf3c5a259e84b769ac4e7946f8477 +size 5605203 diff --git a/API_Transformers/messi/Clips_30s/messi_part_047.mp4 b/API_Transformers/messi/Clips_30s/messi_part_047.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5b7c4af18a07c34e985f846388c64034ceda0bf1 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_047.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef7d35268017a9ad1a119a8b59e9a01234ac9b01bdaa311f1d806ac1378158bc +size 5885236 diff --git a/API_Transformers/messi/Clips_30s/messi_part_048.mp4 b/API_Transformers/messi/Clips_30s/messi_part_048.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..489d6810a4e55af680a08e06bd5264d28c84839b --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_048.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929db09ea1bbbcdcbfed9c01b03da4b660429d0dac526ce88375a9959e25c4f9 +size 6273253 diff --git a/API_Transformers/messi/Clips_30s/messi_part_049.mp4 b/API_Transformers/messi/Clips_30s/messi_part_049.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bdfc8b6af31fea7bd7cb5c3bb869de26f9815463 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_049.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868116b8133be0d43509bca47c18395ca6a3c5081aef1be4ec6a74dbf2e2fab3 +size 5154878 diff --git a/API_Transformers/messi/Clips_30s/messi_part_050.mp4 b/API_Transformers/messi/Clips_30s/messi_part_050.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4b275aa4be6ca439e53673ad25e3833f3b10179a --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_050.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c25435c1a85d1690671f1f6dbccfc465bc2c0299d40956281ebace6b71c2d1c +size 7514685 diff --git a/API_Transformers/messi/Clips_30s/messi_part_051.mp4 b/API_Transformers/messi/Clips_30s/messi_part_051.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..23d6876e27a2f6e34ba81c11a65bdbb41d79a38d --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_051.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804055b42517e80f9eca8264154652e76dc2e7e66a85e43e923eaa2b40005236 +size 6688144 diff --git a/API_Transformers/messi/Clips_30s/messi_part_052.mp4 b/API_Transformers/messi/Clips_30s/messi_part_052.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ef3535a8ac24be4b4837bb6d231032aef21fb327 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_052.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba9244b26390687d9f0f1bec14cfd2cf6f927199d99aa3a76c77a8bb43871901 +size 5682828 diff --git a/API_Transformers/messi/Clips_30s/messi_part_053.mp4 b/API_Transformers/messi/Clips_30s/messi_part_053.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ae34e79b72296959ad70a516a587781c4f20e2b6 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_053.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8026c20af029473ed9f0645a90436288505f8cd11818665fe087f6ae5c3719 +size 6652106 diff --git a/API_Transformers/messi/Clips_30s/messi_part_054.mp4 b/API_Transformers/messi/Clips_30s/messi_part_054.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2135f6e6432976db9656ec3314a3ff581f6b3f7e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_054.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801aba2da8c2fe36275ca656701e3d2f71e592181bc98a83de990a2b599f055c +size 6756080 diff --git a/API_Transformers/messi/Clips_30s/messi_part_055.mp4 b/API_Transformers/messi/Clips_30s/messi_part_055.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..02259d958a38e3c60a75dfcfa94fde775834def7 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_055.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2243a80807c1d9fbccdacbba01b4377f9690cf296a4aa84ab6073e7df6fd7e1 +size 6873850 diff --git a/API_Transformers/messi/Clips_30s/messi_part_056.mp4 b/API_Transformers/messi/Clips_30s/messi_part_056.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0ae6629076b7ea258133d59e7837cec74f6677b7 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_056.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f61be9290b16f31ca2cbceeb651e9238c0974c3f93ffe51248ca41055e96689 +size 6976283 diff --git a/API_Transformers/messi/Clips_30s/messi_part_057.mp4 b/API_Transformers/messi/Clips_30s/messi_part_057.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c5cd3bda5992e26d75bff34d412509e3cee7ef70 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_057.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1ad0d7e18f83e34e17bfe2d3255490402a73275736e1cadd097be1cd0c525c +size 5922226 diff --git a/API_Transformers/messi/Clips_30s/messi_part_058.mp4 b/API_Transformers/messi/Clips_30s/messi_part_058.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..dfb031a10df581cca769450d245bd1ec9c60b79e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_058.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa6ed4f673453d8c19374404a4170881921a04b2e01e95795ba12d564f624715 +size 6249258 diff --git a/API_Transformers/messi/Clips_30s/messi_part_059.mp4 b/API_Transformers/messi/Clips_30s/messi_part_059.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..0c3ea97cc1472a0a82d4f2c09e093e179afdadb9 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_059.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f191bd6aae9d8cb05527bee9664d9c6a2105c331a89596686a8fa729a0290eac +size 8019035 diff --git a/API_Transformers/messi/Clips_30s/messi_part_060.mp4 b/API_Transformers/messi/Clips_30s/messi_part_060.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b9a8d240331133206b9484377a8e5cff98e053f0 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_060.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2125df8cbb645d723739313ba0aee34c46838ee228fbddfaf2efc8a79aff375b +size 6014143 diff --git a/API_Transformers/messi/Clips_30s/messi_part_061.mp4 b/API_Transformers/messi/Clips_30s/messi_part_061.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c98f74a45617a5ccf8bfd9000a437c17bbd7f89e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_061.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5821b09c76e24d7e61cc8aecb77d6ec98c710752de7b79c9ec498973f37d856b +size 6164945 diff --git a/API_Transformers/messi/Clips_30s/messi_part_062.mp4 b/API_Transformers/messi/Clips_30s/messi_part_062.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1609ec9b71629bca793595380c384b6a4d0274b6 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_062.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b5467bc7b5e9d53b70b078f44357fccc68f30528d0886b60d0f487a031cd242 +size 6639124 diff --git a/API_Transformers/messi/Clips_30s/messi_part_063.mp4 b/API_Transformers/messi/Clips_30s/messi_part_063.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..999bab04278233d3dca270837e167084de5a61cf --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_063.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f607332bc8510f2b48ef74faeccf9a2ac84bf7202f4c607689d3d9e4316347b8 +size 7136687 diff --git a/API_Transformers/messi/Clips_30s/messi_part_064.mp4 b/API_Transformers/messi/Clips_30s/messi_part_064.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a82ec8bf0933ade4d3e2194e45d7efe43a50d53d --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_064.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ad87383b1dac1e2d829687cb08171514cba04de3c9720c334369cadd6720d95 +size 7282599 diff --git a/API_Transformers/messi/Clips_30s/messi_part_065.mp4 b/API_Transformers/messi/Clips_30s/messi_part_065.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6fd06d742fca68b1b19643a26828a4b3505a577e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_065.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9db88c838b020fe4509fe7ef676c89f3ebbcf5905fef039eadfe9bbe535d2bb +size 6989261 diff --git a/API_Transformers/messi/Clips_30s/messi_part_066.mp4 b/API_Transformers/messi/Clips_30s/messi_part_066.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..250c474023e19ac256fee5c01f5417e561f3f1e5 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_066.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b80d784c8098d95cbd62a2bbfe9276604da615a4865f5055b785ae5066034c +size 7460119 diff --git a/API_Transformers/messi/Clips_30s/messi_part_067.mp4 b/API_Transformers/messi/Clips_30s/messi_part_067.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3a14ce249196c5056eeb81aab9fbfd87af1f392c --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_067.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40d171abce5d7531d3ec72d550e35e628ff604b69fb66d2a925a79aa4a2cc751 +size 4918360 diff --git a/API_Transformers/messi/Clips_30s/messi_part_068.mp4 b/API_Transformers/messi/Clips_30s/messi_part_068.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7da648575a28bf96d2dd65cf8836d33838d18caf --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_068.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e99ce5853428ea3d2ca93a22aabee88a0b7fe8b024662afe384a3bbf8e5287 +size 4535927 diff --git a/API_Transformers/messi/Clips_30s/messi_part_069.mp4 b/API_Transformers/messi/Clips_30s/messi_part_069.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e30bad545019ce6cc0980a3394b5de01bc9f5c2e --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_069.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e43e379b9e2ead755415d7162b67e2d99a3d8fe175c9ffd4db3ff70621fc3a +size 6677095 diff --git a/API_Transformers/messi/Clips_30s/messi_part_070.mp4 b/API_Transformers/messi/Clips_30s/messi_part_070.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fc60740a6ef5c5f5c6000821dbbe4ae4c822a063 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_070.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e66e7863cb00c331d4bbda862883f780995863964f32b8cda29da1809cdfe80 +size 6049486 diff --git a/API_Transformers/messi/Clips_30s/messi_part_071.mp4 b/API_Transformers/messi/Clips_30s/messi_part_071.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7d220b6e4b9f7de7f1d9c17f0f36087a9806845f --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_071.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18a57e1970eae61c784f080a65f99b830f08ad04194f23f130b57fe930b83ab +size 4581074 diff --git a/API_Transformers/messi/Clips_30s/messi_part_072.mp4 b/API_Transformers/messi/Clips_30s/messi_part_072.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..79dd8b34dfc5d9390e6f798fa399583319b71ac6 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_072.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a27eba1327bdad226bff7b2cbdd2e7b33a21d4edea14517be064f95ab0e6f24 +size 4958016 diff --git a/API_Transformers/messi/Clips_30s/messi_part_073.mp4 b/API_Transformers/messi/Clips_30s/messi_part_073.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b9a77e2011431c5a42b370195b979491345ee571 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_073.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf3f1baca19125d2d55c9cec98ddff3b6984bc2c809baacd58933cd44cabb253 +size 5250252 diff --git a/API_Transformers/messi/Clips_30s/messi_part_074.mp4 b/API_Transformers/messi/Clips_30s/messi_part_074.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3fcde03d64b79edae9c23563c278dc1a9350ff2f --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_074.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3aea1c649d26274eabcc590682ec5b520a976d292d88e3fc31f26c40ab30e4 +size 5241948 diff --git a/API_Transformers/messi/Clips_30s/messi_part_075.mp4 b/API_Transformers/messi/Clips_30s/messi_part_075.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5fa570f767c93be5295935f258dd57ab76618e8b --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_075.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb2cfc1082fa9ae39d2f81ea3fcaaae7decad82649ca0cdbdae8dc4865028cd +size 6141020 diff --git a/API_Transformers/messi/Clips_30s/messi_part_076.mp4 b/API_Transformers/messi/Clips_30s/messi_part_076.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..984326ed8ecb218b629ec694646f3420b6423284 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_076.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4ef1c8b6b822435f82f72c57a87a437e4b6cc7cf065a12b7cffa6913ef7bad9 +size 5133303 diff --git a/API_Transformers/messi/Clips_30s/messi_part_077.mp4 b/API_Transformers/messi/Clips_30s/messi_part_077.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f0e56e093582c55a862665abebfd5831a17d85a6 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_077.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:277e9868e8bcc66a8cb33c3f543f3fc93c50557e90ceb49097a173ad88afae78 +size 7030017 diff --git a/API_Transformers/messi/Clips_30s/messi_part_078.mp4 b/API_Transformers/messi/Clips_30s/messi_part_078.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..fb55f348c8facfb00307f05dccd82878fc76a103 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_078.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16d44fb88b45348515aed2a2532c6d8c9441ab80a8b523b7350a721e432b4da +size 6111048 diff --git a/API_Transformers/messi/Clips_30s/messi_part_079.mp4 b/API_Transformers/messi/Clips_30s/messi_part_079.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7850a0b0889cba48ef970517140efb4ed4968edb --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_079.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f57144fd56145f338c546c8a5fa0342b6c1588643379fb046df555c0793bb04 +size 6725713 diff --git a/API_Transformers/messi/Clips_30s/messi_part_080.mp4 b/API_Transformers/messi/Clips_30s/messi_part_080.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c13647c51bba7f76426e3a2dc16eb67d210a41ae --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_080.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed3dc9e9b82a9e07c6dc1742b2b3a33237107c3368d0c2de03b22a92f5b2dd4 +size 7982044 diff --git a/API_Transformers/messi/Clips_30s/messi_part_081.mp4 b/API_Transformers/messi/Clips_30s/messi_part_081.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a9e571117029157005afa41ffb6fa75fa5d96f61 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_081.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c01cb391b05ae059c2c5ccd1d335e5858b23eadb00b40eb5f4b71df23cd9ac4d +size 7719716 diff --git a/API_Transformers/messi/Clips_30s/messi_part_082.mp4 b/API_Transformers/messi/Clips_30s/messi_part_082.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7298bf5a7687f9b5cd6e3d5ef20b88853b4813e7 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_082.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42acf7bdb29befb3bc01137f5646502574140b99709e7f59b293123e09dd7e4a +size 5652081 diff --git a/API_Transformers/messi/Clips_30s/messi_part_083.mp4 b/API_Transformers/messi/Clips_30s/messi_part_083.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c6ff6c1463f23df41b43c028644a869dab9c6cb4 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_083.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad52a6e464f84ba24f77f6b3c9ba3814cced36b7807fdb87be430f4cf8ed2d86 +size 6176717 diff --git a/API_Transformers/messi/Clips_30s/messi_part_084.mp4 b/API_Transformers/messi/Clips_30s/messi_part_084.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ae85ef9feddad0aa7a4c57846d52784d9a2ee330 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_084.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc00bc33795f13c8ef003d77d5089bc83be04ed5645efffa03899de90444d12f +size 8096964 diff --git a/API_Transformers/messi/Clips_30s/messi_part_085.mp4 b/API_Transformers/messi/Clips_30s/messi_part_085.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5949cc1f727b78671b82e4bf872978f5a604c615 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_085.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae407fa5661d64d56a99756a58fe6f874219d9d4d3e3a677e04890afe1a5e506 +size 6033039 diff --git a/API_Transformers/messi/Clips_30s/messi_part_086.mp4 b/API_Transformers/messi/Clips_30s/messi_part_086.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d7fd8b8165185964c393e85a424046db77bb42dc --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_086.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9450d1ac0b525975396a3e55676070207a81695d85e0f76f9cc81739d3a1d587 +size 7080037 diff --git a/API_Transformers/messi/Clips_30s/messi_part_087.mp4 b/API_Transformers/messi/Clips_30s/messi_part_087.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..7d755d83e499ec235c254a523fef42d137a9dc76 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_087.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80b5430ae4cd9430724182b8d0cf0f2d1a9f0d56548ee16e865a267be16ebd2 +size 8378513 diff --git a/API_Transformers/messi/Clips_30s/messi_part_088.mp4 b/API_Transformers/messi/Clips_30s/messi_part_088.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2cbeacca8389b9769c7ea6546dfa8b80c1a818cc --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_088.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a8e37c17a8154e4fb6dc52ca1ecf4ce87133138207bfd207cf85cc4dd164bbb +size 6592290 diff --git a/API_Transformers/messi/Clips_30s/messi_part_089.mp4 b/API_Transformers/messi/Clips_30s/messi_part_089.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..498a8145399bcee4951e025c80496b09d17aa2a2 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_089.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50236047062dc3f34339ddd5672c340114c44943f578a9ddef19049af75804d1 +size 5172512 diff --git a/API_Transformers/messi/Clips_30s/messi_part_090.mp4 b/API_Transformers/messi/Clips_30s/messi_part_090.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..acf4cf1df8738aa9d4e0672e49a888ada20374c2 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_090.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b385e656bc85eec7aaf94f9f0ea5d052729bfc1cfe6fe39cd45d944023f28c +size 5979770 diff --git a/API_Transformers/messi/Clips_30s/messi_part_091.mp4 b/API_Transformers/messi/Clips_30s/messi_part_091.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d9a29feb3f50874c1f0bc19db32263c5204b3a65 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_091.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f3da8751fbe4923a51ae84454a7127b52ccf026acc64b95db7d8667e2db808c +size 5259354 diff --git a/API_Transformers/messi/Clips_30s/messi_part_092.mp4 b/API_Transformers/messi/Clips_30s/messi_part_092.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..77cb70ab57dac0c175f9cb1dc9d5da9cda7b4dcc --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_092.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10ca35a443ea1477cbfb29a7fdebbbb7467c89971e2e092a3948bc4b291fdf33 +size 3977365 diff --git a/API_Transformers/messi/Clips_30s/messi_part_093.mp4 b/API_Transformers/messi/Clips_30s/messi_part_093.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a8811911c0df64ef26b2ecbb58dcd6eb806a6f75 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_093.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8608b308f1d0169d922e0c26e4bc1b01a5cbfc8f2fda02ee014c3014c232e57 +size 928447 diff --git a/API_Transformers/messi/Clips_30s/messi_part_094.mp4 b/API_Transformers/messi/Clips_30s/messi_part_094.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..285b5a21b857f798dc7c655b73e07784d15ab012 --- /dev/null +++ b/API_Transformers/messi/Clips_30s/messi_part_094.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7700336c48ccfe70400385aac513a67875ea2eb08e5f9dbe8a8bcf924d42e9be +size 270933 diff --git a/API_Transformers/messi/Clips_60s/messi_part_001.mp4 b/API_Transformers/messi/Clips_60s/messi_part_001.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..602262c3202af94cbfd162093bff97d2508f2698 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_001.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21ba663b82bedabbd2231ac27ae97fe2e5f79431c56d508df2716a094d68a83e +size 11150278 diff --git a/API_Transformers/messi/Clips_60s/messi_part_002.mp4 b/API_Transformers/messi/Clips_60s/messi_part_002.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..643435711f32af6c9e39b27852fb26570dc3e090 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_002.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2a7d1775d0223262e91a0ae2600de0b9e738bde065a82b5c2ab122a6deadc96 +size 14835896 diff --git a/API_Transformers/messi/Clips_60s/messi_part_003.mp4 b/API_Transformers/messi/Clips_60s/messi_part_003.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f66115607abcc948b6d85b18ba7fb0f1d961eed9 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_003.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a709f57fe663b6048349bdcaf7206917f02fef5247ed5818bd005095f8cc491d +size 11518425 diff --git a/API_Transformers/messi/Clips_60s/messi_part_004.mp4 b/API_Transformers/messi/Clips_60s/messi_part_004.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3e3a38fffe1f6f56fa5cceeeaf7bd4624f687012 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_004.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04bd099df34724db0551bfd380c1e2502b2f498fc4cd18d1a7af009215ba4f2c +size 14517953 diff --git a/API_Transformers/messi/Clips_60s/messi_part_005.mp4 b/API_Transformers/messi/Clips_60s/messi_part_005.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bbd1066f5bf8ba949a77f5925144824c84a7d15f --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_005.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63f9849c23ad5ee8f3558f4f738be3b3ac9949f70dda5e7994715ef28394f701 +size 12070971 diff --git a/API_Transformers/messi/Clips_60s/messi_part_006.mp4 b/API_Transformers/messi/Clips_60s/messi_part_006.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9d4fae044bcb32d696a569ca1b89b39fe4e6c591 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_006.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f257689bdf193fd51b885a531c803fa98e17c9c41b1cf317766ae1d617b84e4 +size 13860637 diff --git a/API_Transformers/messi/Clips_60s/messi_part_007.mp4 b/API_Transformers/messi/Clips_60s/messi_part_007.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6ee25fa1d80eb8df76c2c0cc105a58ad4554c3e8 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_007.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b54dec33db937eda640a58c5260b1b1f7cae556fa19036edb4ed7aad32945e7 +size 11530049 diff --git a/API_Transformers/messi/Clips_60s/messi_part_008.mp4 b/API_Transformers/messi/Clips_60s/messi_part_008.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..170e3dedaaa770bfa4f205e2646bdc815f152024 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_008.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62487b1375d047ec9ef524f939d04b487eec7549492bdaefd1c9814336bda3a7 +size 11494749 diff --git a/API_Transformers/messi/Clips_60s/messi_part_009.mp4 b/API_Transformers/messi/Clips_60s/messi_part_009.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d0c58106cbf4d26d59d69388c6986efe18d743b9 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_009.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f62d2355840464058eefbd86953f5ca0929b3a07c6a8129c2176700fc7f9a5 +size 10904283 diff --git a/API_Transformers/messi/Clips_60s/messi_part_010.mp4 b/API_Transformers/messi/Clips_60s/messi_part_010.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..19e32e3fca4357bc085b1f54755cbd589b0a4972 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_010.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24305e71d8119e5ec661f2701e2e27ac8a90ac51b0aaba99e257a707abb2bcb8 +size 13145452 diff --git a/API_Transformers/messi/Clips_60s/messi_part_011.mp4 b/API_Transformers/messi/Clips_60s/messi_part_011.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c7335d5141b78fc6df0bd803c0f502f7e36c5b24 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_011.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b8d94b4fdde980e60fc211be0070fdf68f9c5742e3da2a6d0bab77f64b350d7 +size 11613192 diff --git a/API_Transformers/messi/Clips_60s/messi_part_012.mp4 b/API_Transformers/messi/Clips_60s/messi_part_012.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..293c6abfbda81dee95c7f50f355d7b7f09460350 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_012.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1b343907b9b49cda3601b5f6923318f8183b511c9fe63f80e8d800d416ad188 +size 10275682 diff --git a/API_Transformers/messi/Clips_60s/messi_part_013.mp4 b/API_Transformers/messi/Clips_60s/messi_part_013.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e01919a7fb042f8d511b639e32f68a2bf006691d --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_013.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81d3d9dd5638f567964905450aaa8e699c7363d4d150ee256d3b2d6ea314b622 +size 13516244 diff --git a/API_Transformers/messi/Clips_60s/messi_part_014.mp4 b/API_Transformers/messi/Clips_60s/messi_part_014.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6328bbfe75f90fe92570b8da566199ccd8f9fd6e --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_014.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:325b8c45675dc2d0b56e1f0e470f10e49fb7cf786427b3146cbea015de413935 +size 10707466 diff --git a/API_Transformers/messi/Clips_60s/messi_part_015.mp4 b/API_Transformers/messi/Clips_60s/messi_part_015.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..acbe82d271a8c6a9aa60dbaec5b82689b28e4a16 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_015.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7664cc919fcc91cb3ad0d8613d1b6c0435aa1613cd72c5ceb293a98ebd4c8017 +size 15139831 diff --git a/API_Transformers/messi/Clips_60s/messi_part_016.mp4 b/API_Transformers/messi/Clips_60s/messi_part_016.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..6e809f278d39c369dde07f93553ea622d7c6d3cd --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_016.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b45639b3d4b3918844288db48452d43ac0fa94cf2ef609dde0db2125c4e4d1 +size 12493549 diff --git a/API_Transformers/messi/Clips_60s/messi_part_017.mp4 b/API_Transformers/messi/Clips_60s/messi_part_017.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..50fafd78757d186f1fa4daf6c664a88992e52208 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_017.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b44a8f43b39fa210b2c7718d7c6b97110cb97e39b18f11e545993e586d2085 +size 14112405 diff --git a/API_Transformers/messi/Clips_60s/messi_part_018.mp4 b/API_Transformers/messi/Clips_60s/messi_part_018.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4ed83db03047cdd82285e24a1bc17b5c33002575 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_018.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7304c41b9365164c7195c74c91e6c731eb758c5d2380a61c45959100d6f52152 +size 13553377 diff --git a/API_Transformers/messi/Clips_60s/messi_part_019.mp4 b/API_Transformers/messi/Clips_60s/messi_part_019.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..811459bef26ba4733acaff8af27e326b5e2c4939 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_019.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575d08114da6bb6194f1171986b636d833c1b88b2445e1dd44c1d15738f445ca +size 12252991 diff --git a/API_Transformers/messi/Clips_60s/messi_part_020.mp4 b/API_Transformers/messi/Clips_60s/messi_part_020.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..bba22711b5db46b4580f9a9a84d3a4159f16a4fb --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_020.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a23fcbc86bd52e18148dcf43adff10ee988a36e1c9ee4e900d60c27cc9151b +size 11998173 diff --git a/API_Transformers/messi/Clips_60s/messi_part_021.mp4 b/API_Transformers/messi/Clips_60s/messi_part_021.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d28bcb972214e1e97831d356f2c5b245bd1663dd --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_021.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e00a265a24496056aec6618cf5d78681da08e96ee09d356918b1aea3846c797 +size 14223585 diff --git a/API_Transformers/messi/Clips_60s/messi_part_022.mp4 b/API_Transformers/messi/Clips_60s/messi_part_022.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..a3761ebf694f9f3ee3327acf2fbe13b943e56691 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_022.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f232fcf1f6e222e727be84bea52c507f80993a96bc558faf8e3bd1c3fd7467 +size 13252135 diff --git a/API_Transformers/messi/Clips_60s/messi_part_023.mp4 b/API_Transformers/messi/Clips_60s/messi_part_023.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..477c96b1d43973b1169879bb886f92a320517a21 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_023.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc1f59d15bdd9f977f407f69e1a51ac100b56f1d662faa42dcbe5e01edb5fbc +size 12065774 diff --git a/API_Transformers/messi/Clips_60s/messi_part_024.mp4 b/API_Transformers/messi/Clips_60s/messi_part_024.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d2b224eb50a3ef1761c5d3d588d1d9e3593de4aa --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_024.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ea1d43658f5b03dedb424c1f14978f01dc3ca5f447db0e6f059f750398875a +size 12168376 diff --git a/API_Transformers/messi/Clips_60s/messi_part_025.mp4 b/API_Transformers/messi/Clips_60s/messi_part_025.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..636a5e3295eca4f7ba4e0e74236858955dd6ec40 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_025.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860a0dc05903a4d1a4230f7b9c7a5f52206ba82becf6bcb0752a87b6ca606136 +size 12656046 diff --git a/API_Transformers/messi/Clips_60s/messi_part_026.mp4 b/API_Transformers/messi/Clips_60s/messi_part_026.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2734689cf98b08cbf89493899c026382c09faa93 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_026.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6be36afa5d7253c9285a12700e46840aa75a02dbf7883bb4cc076ccea18c09a +size 12361581 diff --git a/API_Transformers/messi/Clips_60s/messi_part_027.mp4 b/API_Transformers/messi/Clips_60s/messi_part_027.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..250d286f23f4ebf2442d305d3a699218722dd7e1 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_027.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57982de073ad9f7e74d1b0b21be57b091c2ff76427f0cc7852cb99c352c4e75d +size 13388057 diff --git a/API_Transformers/messi/Clips_60s/messi_part_028.mp4 b/API_Transformers/messi/Clips_60s/messi_part_028.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..1883fc44d63d60f4f32de610a35a933e09b71b9a --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_028.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79737b2a6ab230aca0c8b2c438a1d1cd71a865cf15dde2fc36448a66143fd8f +size 13842660 diff --git a/API_Transformers/messi/Clips_60s/messi_part_029.mp4 b/API_Transformers/messi/Clips_60s/messi_part_029.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e3a4f154ae555b888aaed2b81e13ceb7289326b0 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_029.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4f7c9e3cda4760de149ba2f74a3b6651d77894e2848189df1d073b913d7473 +size 12172009 diff --git a/API_Transformers/messi/Clips_60s/messi_part_030.mp4 b/API_Transformers/messi/Clips_60s/messi_part_030.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..9f9e26d9d93ff8de3cf10d0fdad7a3ce7692d802 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_030.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c78aedc80acd3c2868ba9e850b8a20ca0f11833f4611a877fd9a21495d37a0b +size 14035238 diff --git a/API_Transformers/messi/Clips_60s/messi_part_031.mp4 b/API_Transformers/messi/Clips_60s/messi_part_031.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..b2063d5b007f1a493545531277e93fcb5d55816c --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_031.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc7dd9fbba875f7023e5ba5fce5d3183ccbaabc24b97b7330a95e81524c23847 +size 12839722 diff --git a/API_Transformers/messi/Clips_60s/messi_part_032.mp4 b/API_Transformers/messi/Clips_60s/messi_part_032.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..dc70df443ec2f5bd2ad61beb0416a75a9d2989e4 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_032.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:300fc3e73022fd088b84f64083e5e9d516b408cc660f53039fa400fd029cb7f5 +size 14413709 diff --git a/API_Transformers/messi/Clips_60s/messi_part_033.mp4 b/API_Transformers/messi/Clips_60s/messi_part_033.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..c2b1c21ab755e05320b27609690eb76248dbfcb9 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_033.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8885da1a3fdddccafa74a9d693130be96cce199d8336fc9659020d47ba5056f9 +size 14436840 diff --git a/API_Transformers/messi/Clips_60s/messi_part_034.mp4 b/API_Transformers/messi/Clips_60s/messi_part_034.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e0a7605c4fce249785ad1f3c6d5a545200a27e9e --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_034.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:872d0378aa3dbe22b1b9869ee6cdb498b53aaf9819755bd57c7a6562add5a950 +size 9449773 diff --git a/API_Transformers/messi/Clips_60s/messi_part_035.mp4 b/API_Transformers/messi/Clips_60s/messi_part_035.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f1e61df5947dc001e1fff216a58204fc26d6b45c --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_035.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:370a7f5793660994c5a5efeff9c404bf844e36d76d192c6de0bd33d4ea9c7ae8 +size 12710289 diff --git a/API_Transformers/messi/Clips_60s/messi_part_036.mp4 b/API_Transformers/messi/Clips_60s/messi_part_036.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3b6d7cc8666bb3577261946b2bf52ced420f4e89 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_036.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925308808748d803745ac51e91c3a71ce978b6dc9612cb47316f93d2f820601f +size 9536802 diff --git a/API_Transformers/messi/Clips_60s/messi_part_037.mp4 b/API_Transformers/messi/Clips_60s/messi_part_037.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3c0b1206ee6e50fe1d68af953c0ea9f2f37eafb2 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_037.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:757d8bca1643d6a7ddb840939d2b9140f824b536708fb97e3231b232f6298845 +size 10480931 diff --git a/API_Transformers/messi/Clips_60s/messi_part_038.mp4 b/API_Transformers/messi/Clips_60s/messi_part_038.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..2e4c9fa04af20a67304d13bc2ba1340cf7e844ad --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_038.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e567559fb37293701af2705e21c0973e56a564dcd73c70222f1a4ef40b2bdb0 +size 11294265 diff --git a/API_Transformers/messi/Clips_60s/messi_part_039.mp4 b/API_Transformers/messi/Clips_60s/messi_part_039.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..3a27bab95c32fa5f0c62bfed4432c413da663ed6 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_039.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89340cb7cdd5d2aa9ad1d007ba7eb53ec2580882bb3dc885cf0bac71c7c6d792 +size 13137451 diff --git a/API_Transformers/messi/Clips_60s/messi_part_040.mp4 b/API_Transformers/messi/Clips_60s/messi_part_040.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..91731dfb0c10d3e9f091fec9d5445f370fac931e --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_040.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6d1d8fbe295a6c1afb21688e51814770441ae3a535c3436a2d1eb09ccdc2996 +size 14692767 diff --git a/API_Transformers/messi/Clips_60s/messi_part_041.mp4 b/API_Transformers/messi/Clips_60s/messi_part_041.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..938e1e4a3d4c71c3920147f8ea4e6c97013fcb5b --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_041.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:566e739b00dfbc6d7faffa231dac0dc1385d02b52b9b1753e14786a86ba5aeeb +size 13369755 diff --git a/API_Transformers/messi/Clips_60s/messi_part_042.mp4 b/API_Transformers/messi/Clips_60s/messi_part_042.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e40c022cb5378dbedcc093b6c05218257cba3884 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_042.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5cc026715a3c5fcaf015c9ace8f42d45650906a3fcd673f975ed3d87e489a03 +size 14284985 diff --git a/API_Transformers/messi/Clips_60s/messi_part_043.mp4 b/API_Transformers/messi/Clips_60s/messi_part_043.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..341eaf797a8f486fcaf50894cb23ba916b80e4a1 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_043.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02449cb4438a50aa64dccbe172cab18a3dc4d9b29fa3beb292498d9a4e690f7f +size 13130999 diff --git a/API_Transformers/messi/Clips_60s/messi_part_044.mp4 b/API_Transformers/messi/Clips_60s/messi_part_044.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..5d7a04bde6e3400b55dffca1eb736d3ad4b2f15f --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_044.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048e657e39f43b04e2940919794ced32d746f37d50055657f163f59048779119 +size 14957834 diff --git a/API_Transformers/messi/Clips_60s/messi_part_045.mp4 b/API_Transformers/messi/Clips_60s/messi_part_045.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..e67dca7eeca78052d39eb667cb8bab66e8a92731 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_045.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab7732888c23d834597af51ad984fac7791bfc994dc2957ffb374e7687a29d0 +size 11150551 diff --git a/API_Transformers/messi/Clips_60s/messi_part_046.mp4 b/API_Transformers/messi/Clips_60s/messi_part_046.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..aae127c4aebb65647e4237ade17fabb55d46c8b9 --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_046.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd97e5261c48da791be5a1bbc4747535c598d6788ee25f062ae6381b602b6afe +size 9236716 diff --git a/API_Transformers/messi/Clips_60s/messi_part_047.mp4 b/API_Transformers/messi/Clips_60s/messi_part_047.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..4fe35b4d9b0df7ff99eb85bfde6517b3c08d44ef --- /dev/null +++ b/API_Transformers/messi/Clips_60s/messi_part_047.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80974966f6e776001e54137e11245b90f159e0fa9f2bc36c99dfad7a974e9030 +size 1197446 diff --git a/API_Transformers/models/__pycache__/gemma.cpython-311.pyc b/API_Transformers/models/__pycache__/gemma.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..964c354c2c3499a687a96cf347bdc3286b4f7211 Binary files /dev/null and b/API_Transformers/models/__pycache__/gemma.cpython-311.pyc differ diff --git a/API_Transformers/models/__pycache__/lfm.cpython-311.pyc b/API_Transformers/models/__pycache__/lfm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..05c99563a629df33f88736f448e5e845b0782b17 Binary files /dev/null and b/API_Transformers/models/__pycache__/lfm.cpython-311.pyc differ diff --git a/API_Transformers/models/__pycache__/minicpm.cpython-311.pyc b/API_Transformers/models/__pycache__/minicpm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4f4255257d6760193719562a39f245d719f4e76a Binary files /dev/null and b/API_Transformers/models/__pycache__/minicpm.cpython-311.pyc differ diff --git a/API_Transformers/models/__pycache__/qwen.cpython-311.pyc b/API_Transformers/models/__pycache__/qwen.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3a3012e05c0b3ba162d465653692a533e497350f Binary files /dev/null and b/API_Transformers/models/__pycache__/qwen.cpython-311.pyc differ diff --git a/API_Transformers/models/gemma.py b/API_Transformers/models/gemma.py new file mode 100644 index 0000000000000000000000000000000000000000..149d2a8d338d7542fee88b1e00a77f7c42972e8a --- /dev/null +++ b/API_Transformers/models/gemma.py @@ -0,0 +1,106 @@ +# pip install accelerate + +from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer +from PIL import Image +import requests +import torch +from threading import Thread +import logging +import time +import pynvml + +class Gemma: + def __init__(self, model_id): + self.model_id = model_id + self.model = Gemma3ForConditionalGeneration.from_pretrained( + model_id, device_map="auto", torch_dtype=torch.bfloat16 + ).eval() + self.processor = AutoProcessor.from_pretrained(model_id) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + start_time = time.time() + + messages = [ + { + "role": "system", + "content": [{"type": "text", "text": "You are a helpful assistant."}] + }, + + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}] + } + ] + + for image in video: + messages[1]["content"].append({"type": "image", "image": image}) + + print(messages) + inputs = self.processor.apply_chat_template( + messages, add_generation_prompt=True, tokenize=True, + return_dict=True, return_tensors="pt" + ).to(self.model.device) + + logging.info(f"Prompt token length: {len(inputs.input_ids[0])}") + + streamer = TextIteratorStreamer(self.processor, skip_prompt=True, skip_special_tokens=True) + + generation_kwargs = dict( + **inputs, + streamer=streamer, + max_new_tokens=512 + ) + + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) + thread.start() + + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in streamer: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + thread.join() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.processor.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } + diff --git a/API_Transformers/models/lfm.py b/API_Transformers/models/lfm.py new file mode 100644 index 0000000000000000000000000000000000000000..23a12e3c8d77c1a5c11eb0acc33320076159bfdf --- /dev/null +++ b/API_Transformers/models/lfm.py @@ -0,0 +1,104 @@ +from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer +from transformers.image_utils import load_image +from threading import Thread +import logging +import torch +import time +import pynvml + +class LFM2: + def __init__(self, model_id): + self.model_id = model_id + self.model = AutoModelForImageTextToText.from_pretrained( + model_id, + device_map="auto", + torch_dtype=torch.bfloat16, + trust_remote_code=True + ) + self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + start_time = time.time() + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + ], + }, + ] + # Assume video is a list of image paths + images = [load_image(image_path) for image_path in video] + for image in images: + conversation[0]["content"].append({"type": "image", "image": image}) + + # Generate Answer + inputs = self.processor.apply_chat_template( + conversation, + add_generation_prompt=True, + return_tensors="pt", + return_dict=True, + tokenize=True, + ).to(self.model.device) + + logging.info(f"Prompt token length: {len(inputs.input_ids[0])}") + streamer = TextIteratorStreamer(self.processor, skip_prompt=True, skip_special_tokens=True) + + generation_kwargs = dict( + **inputs, + streamer=streamer, + max_new_tokens=512 + ) + + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) + thread.start() + + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in streamer: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + thread.join() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.processor.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } + diff --git a/API_Transformers/models/minicpm.py b/API_Transformers/models/minicpm.py new file mode 100644 index 0000000000000000000000000000000000000000..776048870f73776b0546749c4a976ef9fc12e40e --- /dev/null +++ b/API_Transformers/models/minicpm.py @@ -0,0 +1,85 @@ +from PIL import Image +import torch +from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer +from transformers.image_utils import load_image +from threading import Thread +import logging +import time +import pynvml + +class MiniCPM: + def __init__(self, model_id): + self.model_id = model_id + self.model = AutoModel.from_pretrained( + model_id, + trust_remote_code=True, + attn_implementation='sdpa', + torch_dtype=torch.bfloat16 + ) + self.model = self.model.eval().cuda() + self.tokenizer = AutoTokenizer.from_pretrained( + model_id, trust_remote_code=True + ) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + start_time = time.time() + + images = [Image.open(frame).convert('RGB') for frame in video] + content = images + [prompt] + msgs = [{'role': 'user', 'content': content}] + + # MiniCPM's chat method handles streaming internally + res = self.model.chat( + image=None, + msgs=msgs, + tokenizer=self.tokenizer, + stream=True + ) + + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in res: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } diff --git a/API_Transformers/models/qwen.py b/API_Transformers/models/qwen.py new file mode 100644 index 0000000000000000000000000000000000000000..a0e12cac2c55a1d22e460316d7f5cff3416a61a1 --- /dev/null +++ b/API_Transformers/models/qwen.py @@ -0,0 +1,115 @@ +from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor, Qwen2_5_VLForConditionalGeneration +from qwen_vl_utils import process_vision_info +from transformers import TextIteratorStreamer +from threading import Thread +import logging +import torch +import time +import pynvml + +class Qwen2VL: + def __init__(self, model_id): + self.model_id = model_id + if "2.5" in model_id: + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + model_id, torch_dtype="float16", device_map="auto" + ) + else: + self.model = Qwen2VLForConditionalGeneration.from_pretrained( + model_id, torch_dtype="float16", device_map="auto" + ) + self.processor = AutoProcessor.from_pretrained(model_id) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if hasattr(self, 'handle') and self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + + start_time = time.time() + + # Preparation for inference + video_paths = [f"file://{path}" for path in video] + messages = [ + { + "role": "user", + "content": [ + { + "type": "video", + "video": video_paths, + "resized_height": 280, + "resized_width": 420, + }, + {"type": "text", "text": prompt}, + ], + } + ] + text = self.processor.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + image_inputs, video_inputs = process_vision_info(messages) + inputs = self.processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to("cuda") + logging.info(f"Prompt token length: {len(inputs.input_ids[0])}") + streamer = TextIteratorStreamer(self.processor, skip_prompt=True, skip_special_tokens=True) + + generation_kwargs = dict( + **inputs, + streamer=streamer, + max_new_tokens=256 + ) + + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) + thread.start() + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in streamer: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + thread.join() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.processor.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } + diff --git a/API_Transformers/output_0821/lfm/20250820_232004/Clips_60s.json b/API_Transformers/output_0821/lfm/20250820_232004/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..9dd19408fd28570d1f9261217025b1c8a0c6eb7d --- /dev/null +++ b/API_Transformers/output_0821/lfm/20250820_232004/Clips_60s.json @@ -0,0 +1,32 @@ +{ + "sample_part_001.mp4": { + "response": "This image captures a bustling scene from a security camera inside a store, providing a bird's eye view of the activity below. The camera, labeled \"Camera 94,\" is positioned to the right of the frame, offering a clear perspective of the store's layout.\n\nThe store appears to be well-stocked with a variety of items displayed on shelves and racks. The shelves are filled with an assortment of products, while the racks are stocked with a diverse range of goods. The store's interior is characterized by a blue wall on the right side of the image, which adds a pop of color to the scene.\n\nIn the foreground, a person is visible, likely a store employee or customer. They are dressed in a blue shirt, which stands out against the colorful background of the store. Their presence adds a human element to the scene, providing a sense of scale and activity.\n\nOverall, this image provides a detailed and dynamic view of a store's interior, showcasing the variety of products on display and the bustling activity within.", + "tokens_per_second": 43.20754976197641, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 217, + "inference_time": 24.827145099639893, + "cpu_usage": 38.8, + "cpu_core_utilization": [ + 20.3, + 39.8, + 19.5, + 75.4 + ], + "request_time": 49.633641958236694 + }, + "sample_part_002.mp4": { + "response": "This image is a surveillance camera view from a security camera, captured on March 5, 2025, at 12:41:21 PM. The camera is positioned in a store, providing a top-down view of the scene. The store appears to be a retail environment, possibly a department store or a specialty shop.\n\nIn the image, there are several people visible. Some of them are wearing blue shirts, which could indicate they are store employees or part of a uniform. Others are wearing gray shirts, which might suggest they are customers or staff members. The exact number of people is not clear from the image, but there seems to be a group of individuals engaged in various activities.\n\nThe store itself is well-lit, with a variety of products on display. The products are arranged in rows, and there are shelves and racks filled with items. The exact nature of the products is not clear from the image, but they could be anything from clothing and accessories to electronics and household goods.\n\nThe store's interior is not fully visible, but it appears to be spacious and organized. The floor is clean, and there are no visible obstructions or clutter. The overall atmosphere of the store seems calm and orderly.\n\nThe image does not provide any additional context or information about the store's location, the specific products being sold, or the store's name. However, the surveillance camera view offers a glimpse into the store's environment and the activities of its patrons.", + "tokens_per_second": 43.03845068270847, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 304, + "inference_time": 29.707706451416016, + "cpu_usage": 53.7, + "cpu_core_utilization": [ + 39.4, + 55.5, + 81.5, + 38.5 + ], + "request_time": 57.447105169296265 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/lfm/20250820_232336/Clips_60s.json b/API_Transformers/output_0821/lfm/20250820_232336/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..9b9f79c43dde25f24793d3f380a1a0bf2d1be7c9 --- /dev/null +++ b/API_Transformers/output_0821/lfm/20250820_232336/Clips_60s.json @@ -0,0 +1,17 @@ +{ + "sample_part_001.mp4": { + "response": "This image captures a bustling scene from a security camera inside a store, providing a bird's eye view of the activity below. The camera, labeled \"Camera 94,\" is positioned to the right of the frame, offering a clear perspective of the store's layout.\n\nThe store appears to be well-stocked with a variety of items displayed on shelves and racks. The shelves are filled with an assortment of products, while the racks are stocked with a diverse range of goods. The store's interior is characterized by a blue wall on the right side of the image, which adds a pop of color to the scene.\n\nIn the foreground, a person is visible, likely a store employee or customer. They are dressed in a blue shirt, which stands out against the colorful background of the store. Their presence adds a human element to the scene, providing a sense of scale and activity.\n\nOverall, this image provides a detailed and dynamic view of a store's interior, showcasing the variety of products on display and the bustling activity within.", + "tokens_per_second": 42.609188518901775, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 217, + "inference_time": 28.07533621788025, + "cpu_usage": 61.6, + "cpu_core_utilization": [ + 56.9, + 67.6, + 54.1, + 67.9 + ], + "request_time": 28.22587299346924 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/lfm/20250820_232604/Clips_60s.json b/API_Transformers/output_0821/lfm/20250820_232604/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..e18023a7ff25bd30dd6493356d59f0c7d257b909 --- /dev/null +++ b/API_Transformers/output_0821/lfm/20250820_232604/Clips_60s.json @@ -0,0 +1,785 @@ +{ + "sample_part_001.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_001.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 48735504, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_001.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.1795644760131836 + }, + "sample_part_002.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_002.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 48588616, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_002.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.2166910171508789 + }, + "sample_part_003.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_003.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 48112083, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_003.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.18287372589111328 + }, + "sample_part_004.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_004.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47430420, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_004.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.16147804260253906 + }, + "sample_part_005.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_005.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46857634, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_005.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.13133859634399414 + }, + "sample_part_006.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_006.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46788959, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_006.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.13338422775268555 + }, + "sample_part_007.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_007.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46918203, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_007.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.13173580169677734 + }, + "sample_part_008.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_008.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 48033592, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_008.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.14929914474487305 + }, + "sample_part_009.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_009.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47454418, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_009.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.13762712478637695 + }, + "sample_part_010.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_010.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46882079, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_010.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.11983299255371094 + }, + "sample_part_011.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_011.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46952492, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_011.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.12041950225830078 + }, + "sample_part_012.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_012.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46288040, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_012.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.11827516555786133 + }, + "sample_part_013.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_013.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46616584, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_013.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.11757230758666992 + }, + "sample_part_014.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_014.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46349023, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_014.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.14239120483398438 + }, + "sample_part_015.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_015.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 46342364, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_015.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.1746082305908203 + }, + "sample_part_016.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_016.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47180176, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_016.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.11389422416687012 + }, + "sample_part_017.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_017.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47445544, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_017.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.12121701240539551 + }, + "sample_part_018.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_018.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47182253, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_018.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.1313309669494629 + }, + "sample_part_019.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_019.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47985680, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_019.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.11887741088867188 + }, + "sample_part_020.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_020.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47196369, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_020.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10411238670349121 + }, + "sample_part_021.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_021.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47282868, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_021.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10732221603393555 + }, + "sample_part_022.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_022.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47029914, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_022.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10542488098144531 + }, + "sample_part_023.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_023.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 48416520, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_023.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.18142175674438477 + }, + "sample_part_024.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_024.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 48251695, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_024.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10632991790771484 + }, + "sample_part_025.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_025.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47354589, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_025.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.1016244888305664 + }, + "sample_part_026.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_026.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47911002, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_026.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10572242736816406 + }, + "sample_part_027.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_027.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47153446, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_027.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10569357872009277 + }, + "sample_part_028.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_028.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47400547, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_028.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10515618324279785 + }, + "sample_part_029.mp4": { + "detail": [ + { + "type": "string_type", + "loc": [ + "body", + "video_file" + ], + "msg": "Input should be a valid string", + "input": { + "filename": "sample_part_029.mp4", + "file": { + "_file": {}, + "_max_size": 1048576, + "_rolled": true + }, + "size": 47312950, + "headers": { + "content-disposition": "form-data; name=\"video_file\"; filename=\"sample_part_029.mp4\"", + "content-type": "video/mp4" + }, + "_max_mem_size": 1048576 + } + } + ], + "request_time": 0.10537385940551758 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/lfm/20250820_232655/Clips_60s.json b/API_Transformers/output_0821/lfm/20250820_232655/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..7c268fcf79f3df60dd2a8b994257f461797a6712 --- /dev/null +++ b/API_Transformers/output_0821/lfm/20250820_232655/Clips_60s.json @@ -0,0 +1,122 @@ +{ + "sample_part_001.mp4": { + "response": "This image captures a bustling scene from a security camera inside a store, providing a bird's eye view of the activity below. The camera, labeled \"Camera 94,\" is positioned to the right of the frame, offering a clear perspective of the store's layout.\n\nThe store appears to be well-stocked with a variety of items displayed on shelves and racks. The shelves are filled with an assortment of products, while the racks are stocked with a diverse range of goods. The store's interior is characterized by a blue wall on the right side of the image, which adds a pop of color to the scene.\n\nIn the foreground, a person is visible, likely a store employee or customer. They are dressed in a blue shirt, which stands out against the colorful background of the store. Their presence adds a human element to the scene, providing a sense of scale and activity.\n\nOverall, this image provides a detailed and dynamic view of a store's interior, showcasing the variety of products on display and the bustling activity within.", + "tokens_per_second": 43.03910315479847, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 217, + "inference_time": 25.01309299468994, + "cpu_usage": 40.1, + "cpu_core_utilization": [ + 36.1, + 41.1, + 37.2, + 46.1 + ], + "request_time": 25.02692174911499 + }, + "sample_part_002.mp4": { + "response": "This image is a surveillance camera view from a security camera, captured on March 5, 2025, at 12:41:21 PM. The camera is positioned in a store, providing a top-down view of the scene. The store appears to be a retail environment, possibly a department store or a specialty shop.\n\nIn the image, there are several people visible. Some of them are wearing blue shirts, which could indicate they are store employees or part of a uniform. Others are wearing gray shirts, which might suggest they are customers or staff members. The exact number of people is not clear from the image, but there seems to be a group of individuals engaged in various activities.\n\nThe store itself is well-lit, with a variety of products on display. The products are arranged in rows, and there are shelves and racks filled with items. The exact nature of the products is not clear from the image, but they could be anything from clothing and accessories to electronics and household goods.\n\nThe store's interior is not fully visible, but it appears to be spacious and organized. The floor is clean, and there are no visible obstructions or clutter. The overall atmosphere of the store seems calm and orderly.\n\nThe image does not provide any additional context or information about the store's location, the specific products being sold, or the store's name. However, the surveillance camera view offers a glimpse into the store's environment and the activities of its patrons.", + "tokens_per_second": 42.95401647014546, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 304, + "inference_time": 27.033696174621582, + "cpu_usage": 39.6, + "cpu_core_utilization": [ + 61.4, + 39.9, + 35.3, + 21.8 + ], + "request_time": 27.048319101333618 + }, + "sample_part_003.mp4": { + "response": "This image is a security camera view of a store checkout lane. The camera is positioned above the checkout counter, looking down at the merchandise on display and the customers waiting in line. The store appears to be well-stocked with a variety of items, including toys, clothing, and electronics. The checkout lane is currently empty, with no customers in sight. The image is captured from a high angle, providing a clear view of the entire checkout area.", + "tokens_per_second": 43.51874005006489, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 92, + "inference_time": 22.018213748931885, + "cpu_usage": 39.8, + "cpu_core_utilization": [ + 81.6, + 22.0, + 31.3, + 24.0 + ], + "request_time": 22.03469491004944 + }, + "sample_part_004.mp4": { + "response": "This image captures a bustling scene from a store's checkout area, viewed from an overhead perspective. The store is filled with a variety of items, including clothing and accessories, which are neatly arranged on shelves and racks. A customer is seen at the counter, likely in the process of making a purchase. The store's interior is well-lit, with a large blue mural on the wall adding a vibrant touch to the atmosphere. The image is timestamped \"03-05-2025 Wed 02:23:24\" and labeled \"Camera 94,\" indicating the source and time of the photograph.", + "tokens_per_second": 42.80125306128213, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 125, + "inference_time": 22.932126998901367, + "cpu_usage": 39.8, + "cpu_core_utilization": [ + 19.9, + 21.9, + 21.1, + 96.1 + ], + "request_time": 22.946391105651855 + }, + "sample_part_005.mp4": { + "response": "This image is a surveillance camera view, captured on Wednesday, May 24, 2024, at 23:49:24. The scene is set in a store, with a variety of items displayed on shelves and racks. The camera is positioned above the store, providing a bird's-eye view of the entire area.\n\nIn the foreground, there is a person wearing a blue shirt, who appears to be a store employee. They are standing near a counter, which is stocked with various items. The counter is located on the left side of the image, and it is surrounded by shelves and racks filled with merchandise.\n\nThe store itself has a blue wall on the right side of the image, which adds a pop of color to the scene. The floor of the store is covered in tiles, which are visible in the bottom left corner of the image.\n\nOverall, this image provides a comprehensive view of the store's layout and the items on display. It gives the impression of a well-organized and well-stocked retail environment.", + "tokens_per_second": 42.835329663650555, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 216, + "inference_time": 24.902441263198853, + "cpu_usage": 38.4, + "cpu_core_utilization": [ + 21.2, + 37.7, + 18.6, + 76.1 + ], + "request_time": 24.91689682006836 + }, + "sample_part_006.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 24, 2025, at 12:24:41 PM. The camera, labeled \"Camera 94,\" is positioned in the upper right corner of the frame. The store appears to be a retail space, with a variety of items displayed on shelves and racks.\n\nIn the center of the image, there is a person wearing a blue shirt, who seems to be a store employee. They are standing in front of a shelf that is stocked with various items, including what appear to be toys or collectibles. The person is facing towards the camera, suggesting they may be interacting with customers or overseeing the display.\n\nThe store's interior features a tiled floor and a blue wall with a mural depicting a cityscape. The overall atmosphere of the store seems organized and well-lit, with the security camera providing a clear view of the activities within.", + "tokens_per_second": 43.349566710658124, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 195, + "inference_time": 24.438754558563232, + "cpu_usage": 39.0, + "cpu_core_utilization": [ + 45.2, + 19.7, + 70.7, + 20.1 + ], + "request_time": 24.453261852264404 + }, + "sample_part_007.mp4": { + "response": "This image is a surveillance camera view of a store checkout lane. The camera is positioned above the checkout lane, looking down at the customers and the items on the conveyor belt. The customers are standing in line, waiting to pay for their purchases. The items on the conveyor belt are various products, including toys and electronics. The store appears to be well-lit and clean. The image is in color and is taken during the day.", + "tokens_per_second": 43.652183023369325, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 90, + "inference_time": 21.98921012878418, + "cpu_usage": 39.8, + "cpu_core_utilization": [ + 80.7, + 22.9, + 34.6, + 21.1 + ], + "request_time": 22.00344204902649 + }, + "sample_part_008.mp4": { + "response": "This image is a security camera footage captured from a store's interior. The camera is positioned above the store's entrance, providing a wide-angle view of the entire store. The footage is timestamped with the date \"05-05-2025\" and the time \"22:21:04\".\n\nThe store appears to be well-stocked with various items on display. There are shelves filled with products, and a counter is visible with a cash register and a person standing behind it. The store's interior is brightly lit, and the walls are painted in a calming shade of blue.\n\nThe footage captures a moment of activity, with several people present in the store. Some are browsing the products on display, while others are engaged in conversation or waiting in line. The overall atmosphere of the store seems to be busy and bustling, typical of a retail environment.", + "tokens_per_second": 42.780439620939916, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 179, + "inference_time": 24.212013959884644, + "cpu_usage": 41.1, + "cpu_core_utilization": [ + 62.1, + 38.6, + 41.8, + 21.9 + ], + "request_time": 24.22656798362732 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/lfm/20250820_233224/Clips_60s.json b/API_Transformers/output_0821/lfm/20250820_233224/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..6402686f714079f0d160cc3082b8d3984e4c9fd8 --- /dev/null +++ b/API_Transformers/output_0821/lfm/20250820_233224/Clips_60s.json @@ -0,0 +1,62 @@ +{ + "sample_part_001.mp4": { + "response": "This image shows a security camera view of a convenience store. The camera is positioned above the checkout counter, looking down at the shelves stocked with various items. A cashier is standing behind the counter, and there are several customers visible in the store. The store appears to be well-lit and organized, with a blue mural on the wall adding a decorative touch. The image is timestamped \"03-05-2025 Wed 24:24:20\" and labeled \"Camera 94\".", + "tokens_per_second": 43.735840716197266, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 102, + "inference_time": 22.10017204284668, + "cpu_usage": 8.4, + "cpu_core_utilization": [ + 9.6, + 5.9, + 10.7, + 7.5 + ], + "request_time": 22.113993883132935 + }, + "sample_part_002.mp4": { + "response": "This image shows a security camera view of a convenience store. The camera is positioned above the checkout counter, capturing the scene from above. The store appears to be well-stocked with various items on display, including what looks like a display of skateboards. There are several people visible in the image, including store employees and customers. The store has a blue wall on the right side and a white wall on the left side. The camera is labeled \"Camera 94\" in the bottom right corner.", + "tokens_per_second": 43.61585189482321, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 103, + "inference_time": 22.203722953796387, + "cpu_usage": 41.2, + "cpu_core_utilization": [ + 40.5, + 22.9, + 75.8, + 25.4 + ], + "request_time": 22.21777629852295 + }, + "sample_part_003.mp4": { + "response": "The video shows a convenience store with a checkout counter and shelves stocked with various items. A person wearing a blue shirt is working at the checkout counter, while another person is standing behind the counter. The shelves are filled with items such as snacks, drinks, and other convenience store products. The video is captured from a high angle, providing a clear view of the store's layout and the people working there.", + "tokens_per_second": 43.7656086803256, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 83, + "inference_time": 21.94315481185913, + "cpu_usage": 40.5, + "cpu_core_utilization": [ + 37.9, + 46.5, + 22.3, + 55.2 + ], + "request_time": 21.957934379577637 + }, + "sample_part_004.mp4": { + "response": "This image shows a security camera view of a store checkout counter. A person wearing a blue shirt is standing behind the counter, likely a cashier. The counter is cluttered with various items, including what appear to be toys, books, and other merchandise. The store has a blue wall with a mural of a city skyline. The camera is labeled \"Camera 94\" in the corner.", + "tokens_per_second": 43.67951928934803, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 82, + "inference_time": 21.797672271728516, + "cpu_usage": 40.2, + "cpu_core_utilization": [ + 67.0, + 20.9, + 50.2, + 22.5 + ], + "request_time": 21.81200623512268 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/lfm/Clips_60s.json b/API_Transformers/output_0821/lfm/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..4b171474180d600d62fb09e23a63d8ca91f05429 --- /dev/null +++ b/API_Transformers/output_0821/lfm/Clips_60s.json @@ -0,0 +1,872 @@ +{ + "sample_part_001.mp4": { + "response": "This image captures a bustling scene from a security camera inside a store, providing a bird's eye view of the activity below. The camera, labeled \"Camera 94,\" is positioned to the right of the frame, offering a clear perspective of the store's layout.\n\nThe store appears to be well-stocked with a variety of items displayed on shelves and racks. The shelves are filled with an assortment of products, while the racks are stocked with a diverse range of goods. The store's interior is characterized by a blue wall on the right side of the image, which adds a pop of color to the scene.\n\nIn the foreground, a person is visible, likely a store employee or customer. They are dressed in a blue shirt, which stands out against the colorful background of the store. Their presence adds a human element to the scene, providing a sense of scale and activity.\n\nOverall, this image provides a detailed and dynamic view of a store's interior, showcasing the variety of products on display and the bustling activity within.", + "tokens_per_second": 39.56194680302735, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 217, + "inference_time": 27.07969617843628, + "cpu_usage": 80.8, + "cpu_core_utilization": [ + 82.5, + 86.9, + 82.1, + 71.6 + ], + "request_time": 55.74139952659607 + }, + "sample_part_002.mp4": { + "response": "This image is a surveillance camera view from a security camera, captured on March 5, 2025, at 12:41:21 PM. The camera is positioned in a store, providing a top-down view of the scene. The store appears to be a retail environment, possibly a department store or a specialty shop.\n\nIn the image, there are several people visible. Some of them are wearing blue shirts, which could indicate they are store employees or part of a uniform. Others are wearing gray shirts, which might suggest they are customers or staff members. The exact number of people is not clear from the image, but there seems to be a group of individuals engaged in various activities.\n\nThe store itself is well-lit, with a variety of products on display. The products are arranged in rows, and there are shelves and racks filled with items. The exact nature of the products is not clear from the image, but they could be anything from clothing and accessories to electronics and household goods.\n\nThe store's interior is not fully visible, but it appears to be spacious and organized. The floor is clean, and there are no visible obstructions or clutter. The overall atmosphere of the store seems calm and orderly.\n\nThe image does not provide any additional context or information about the store's location, the specific products being sold, or the store's name. However, the surveillance camera view offers a glimpse into the store's environment and the activities of its patrons.", + "tokens_per_second": 39.47068338412706, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 304, + "inference_time": 30.126835107803345, + "cpu_usage": 76.5, + "cpu_core_utilization": [ + 76.8, + 64.9, + 74.5, + 89.6 + ], + "request_time": 59.73742198944092 + }, + "sample_part_003.mp4": { + "response": "This image is a security camera view of a store checkout lane. The camera is positioned above the checkout counter, looking down at the merchandise on display and the customers waiting in line. The store appears to be well-stocked with a variety of items, including toys, clothing, and electronics. The checkout lane is currently empty, with no customers in sight. The image is captured from a high angle, providing a clear view of the entire checkout area.", + "tokens_per_second": 40.50104433917785, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 92, + "inference_time": 23.932410717010498, + "cpu_usage": 79.5, + "cpu_core_utilization": [ + 79.5, + 75.0, + 79.3, + 84.2 + ], + "request_time": 49.556724548339844 + }, + "sample_part_004.mp4": { + "response": "This image captures a bustling scene from a store's checkout area, viewed from an overhead perspective. The store is filled with a variety of items, including clothing and accessories, which are neatly arranged on shelves and racks. A customer is seen at the counter, likely in the process of making a purchase. The store's interior is well-lit, with a large blue mural on the wall adding a vibrant touch to the atmosphere. The image is timestamped \"03-05-2025 Wed 02:23:24\" and labeled \"Camera 94,\" indicating the source and time of the photograph.", + "tokens_per_second": 40.64016822608698, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 125, + "inference_time": 24.651363134384155, + "cpu_usage": 77.6, + "cpu_core_utilization": [ + 85.7, + 82.1, + 72.3, + 70.5 + ], + "request_time": 51.03870248794556 + }, + "sample_part_005.mp4": { + "response": "This image is a surveillance camera view, captured on Wednesday, May 24, 2024, at 23:49:24. The scene is set in a store, with a variety of items displayed on shelves and racks. The camera is positioned above the store, providing a bird's-eye view of the entire area.\n\nIn the foreground, there is a person wearing a blue shirt, who appears to be a store employee. They are standing near a counter, which is stocked with various items. The counter is located on the left side of the image, and it is surrounded by shelves and racks filled with merchandise.\n\nThe store itself has a blue wall on the right side of the image, which adds a pop of color to the scene. The floor of the store is covered in tiles, which are visible in the bottom left corner of the image.\n\nOverall, this image provides a comprehensive view of the store's layout and the items on display. It gives the impression of a well-organized and well-stocked retail environment.", + "tokens_per_second": 38.57113528231376, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 216, + "inference_time": 28.635375022888184, + "cpu_usage": 77.1, + "cpu_core_utilization": [ + 73.2, + 79.6, + 81.1, + 74.7 + ], + "request_time": 57.5153546333313 + }, + "sample_part_006.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 24, 2025, at 12:24:41 PM. The camera, labeled \"Camera 94,\" is positioned in the upper right corner of the frame. The store appears to be a retail space, with a variety of items displayed on shelves and racks.\n\nIn the center of the image, there is a person wearing a blue shirt, who seems to be a store employee. They are standing in front of a shelf that is stocked with various items, including what appear to be toys or collectibles. The person is facing towards the camera, suggesting they may be interacting with customers or overseeing the display.\n\nThe store's interior features a tiled floor and a blue wall with a mural depicting a cityscape. The overall atmosphere of the store seems organized and well-lit, with the security camera providing a clear view of the activities within.", + "tokens_per_second": 40.02737695090751, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 195, + "inference_time": 27.571413040161133, + "cpu_usage": 79.0, + "cpu_core_utilization": [ + 80.2, + 69.7, + 76.0, + 90.1 + ], + "request_time": 54.48677635192871 + }, + "sample_part_007.mp4": { + "response": "This image is a surveillance camera view of a store checkout lane. The camera is positioned above the checkout lane, looking down at the customers and the items on the conveyor belt. The customers are standing in line, waiting to pay for their purchases. The items on the conveyor belt are various products, including toys and electronics. The store appears to be well-lit and clean. The image is in color and is taken during the day.", + "tokens_per_second": 40.57407084953663, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 90, + "inference_time": 25.99420714378357, + "cpu_usage": 79.6, + "cpu_core_utilization": [ + 78.2, + 63.9, + 78.7, + 97.4 + ], + "request_time": 50.88256120681763 + }, + "sample_part_008.mp4": { + "response": "This image is a security camera footage captured from a store's interior. The camera is positioned above the store's entrance, providing a wide-angle view of the entire store. The footage is timestamped with the date \"05-05-2025\" and the time \"22:21:04\".\n\nThe store appears to be well-stocked with various items on display. There are shelves filled with products, and a counter is visible with a cash register and a person standing behind it. The store's interior is brightly lit, and the walls are painted in a calming shade of blue.\n\nThe footage captures a moment of activity, with several people present in the store. Some are browsing the products on display, while others are engaged in conversation or waiting in line. The overall atmosphere of the store seems to be busy and bustling, typical of a retail environment.", + "tokens_per_second": 40.97108679897668, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 179, + "inference_time": 26.01269769668579, + "cpu_usage": 77.4, + "cpu_core_utilization": [ + 79.7, + 59.8, + 73.6, + 96.6 + ], + "request_time": 53.32739067077637 + }, + "sample_part_009.mp4": { + "response": "This image is a surveillance camera view of a store interior, captured on Wednesday, May 24, 2022, at 12:29:24. The camera is positioned above the checkout counter, providing a bird's-eye view of the store's layout. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left.\n\nIn the foreground, there is a checkout counter with a cash register and a customer standing in front of it. The customer is wearing a blue shirt and has a yellow bag on their shoulder. Behind the counter, there is a store employee wearing a blue shirt and a black apron, who is looking down at the items on the counter.\n\nThe store appears to be selling a variety of items, including clothing, accessories, and possibly electronics. There are shelves and racks filled with various products, and there are also some displays of clothing and accessories.\n\nOverall, the image provides a clear view of the store's layout and the items being sold, as well as the employees working in the store.", + "tokens_per_second": 39.48549649221776, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 222, + "inference_time": 28.259138345718384, + "cpu_usage": 77.1, + "cpu_core_utilization": [ + 83.8, + 69.4, + 66.9, + 88.1 + ], + "request_time": 57.93466758728027 + }, + "sample_part_010.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store checkout counter. The camera, labeled \"Camera 94,\" provides a bird's-eye view of the area, revealing a variety of objects and people.\n\nThe checkout counter is the focal point of the image, with a person standing behind it, presumably a cashier. The counter is cluttered with various items, including a blue tarp, a yellow and blue box, and a white box.\n\nIn the background, there's a blue wall adorned with a mural depicting a city skyline. The mural adds a touch of urban charm to the scene.\n\nThe image is timestamped \"05-05-2025 Wed 02:29:24,\" indicating that it was captured at 2:29 AM on May 5th, 2025.\n\nOverall, this image provides a unique perspective on a typical shopping day, showcasing the hustle and bustle of a busy store checkout counter.", + "tokens_per_second": 40.48125592448077, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 205, + "inference_time": 28.071035146713257, + "cpu_usage": 76.5, + "cpu_core_utilization": [ + 77.9, + 58.7, + 76.8, + 92.3 + ], + "request_time": 56.648736238479614 + }, + "sample_part_011.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store checkout counter. The camera, labeled \"Camera 94,\" provides a bird's-eye view of the area, revealing a long, narrow aisle filled with a variety of items. The aisle is lined with shelves stocked with merchandise, and a checkout counter is visible in the foreground.\n\nThe most striking feature of the image is the crowd of people gathered around the checkout counter. They are all wearing blue shirts, creating a sea of blue that dominates the scene. The exact number of people is difficult to determine due to the angle of the image, but it appears to be a significant number.\n\nThe crowd seems to be engaged in various activities. Some individuals appear to be in line, possibly waiting to check out or to receive their purchases. Others may be browsing the items on display or simply passing through the area.\n\nThe overall atmosphere of the image is one of organized chaos, typical of a busy retail environment. The crowd of blue-shirted individuals adds a unique visual element to the scene, creating a sense of unity and shared purpose among the group.", + "tokens_per_second": 38.9287779528858, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 230, + "inference_time": 27.47854495048523, + "cpu_usage": 81.7, + "cpu_core_utilization": [ + 80.4, + 82.5, + 84.0, + 80.1 + ], + "request_time": 56.67331099510193 + }, + "sample_part_012.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store checkout counter. The camera, labeled \"Camera 94,\" provides a bird's-eye view of the area, revealing a variety of objects and people.\n\nThe checkout counter itself is a hive of activity. A cashier, dressed in a blue shirt, stands behind the counter, attending to customers. The counter is cluttered with various items, including a yellow and blue backpack, a blue and yellow suitcase, and a black and white sign.\n\nIn the background, several people can be seen, some of whom are likely customers. The store appears to be well-stocked, with a wide range of products on display.\n\nThe image is timestamped \"05-05-2025 Wed 04:31:14,\" indicating that it was captured on May 5th, 2025, at 4:31:14 PM.\n\nOverall, this image provides a detailed snapshot of a typical day in a busy store, as seen from the perspective of a security camera.", + "tokens_per_second": 39.63977281870722, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 218, + "inference_time": 27.233951807022095, + "cpu_usage": 76.7, + "cpu_core_utilization": [ + 91.7, + 76.0, + 62.5, + 76.4 + ], + "request_time": 55.68544602394104 + }, + "sample_part_013.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store checkout counter. The camera, labeled \"Camera 94,\" provides a bird's-eye view of the area, revealing a long checkout line filled with customers. The store's interior is visible, showcasing a blue wall adorned with a mural depicting a cityscape.\n\nAt the forefront of the image, a man wearing a blue shirt is seen bending over the checkout counter, seemingly attending to a customer's issue. The counter itself is cluttered with various items, including a yellow and blue box, a white box, and a black box.\n\nThe store's floor is covered in gray tiles, and the ceiling is painted white, creating a bright and clean atmosphere. The overall scene conveys a sense of activity and commerce, with the security camera serving as an unobtrusive observer of the store's daily operations.", + "tokens_per_second": 38.80665454390215, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 184, + "inference_time": 27.219250679016113, + "cpu_usage": 81.7, + "cpu_core_utilization": [ + 82.7, + 73.3, + 79.5, + 91.2 + ], + "request_time": 54.520111083984375 + }, + "sample_part_014.mp4": { + "response": "This is a surveillance camera view of a store aisle. The camera is positioned above the store, looking down at the merchandise on the shelves. The aisle is filled with various items, including clothing and accessories. There is a person standing at the end of the aisle, possibly a customer or store employee. The camera is labeled \"Camera 94\" in the corner of the image.", + "tokens_per_second": 37.28115717917145, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 77, + "inference_time": 24.268096923828125, + "cpu_usage": 78.4, + "cpu_core_utilization": [ + 75.0, + 67.1, + 82.7, + 88.7 + ], + "request_time": 48.29588508605957 + }, + "sample_part_015.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store checkout counter. The camera, labeled \"Camera 94,\" provides a bird's-eye view of the area, revealing a variety of objects and people.\n\nThe checkout counter is the focal point of the image, with a cashier diligently attending to customers. Behind the counter, a display case showcases an array of colorful items, including yellow and blue bags, as well as a selection of clothing.\n\nThe store is filled with shoppers, their faces blurred by the security camera's lens. They move about the store, some pushing carts filled with merchandise, while others stand in line at the checkout counter.\n\nThe image is taken from a high angle, offering a comprehensive view of the store's layout and activity. The camera's perspective allows for a clear view of the checkout counter, the display case, and the bustling store environment.\n\nOverall, this image provides a detailed snapshot of a typical day in a busy store, as observed by a security camera.", + "tokens_per_second": 39.45218673810312, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 215, + "inference_time": 28.369194269180298, + "cpu_usage": 77.8, + "cpu_core_utilization": [ + 72.5, + 73.4, + 81.9, + 83.0 + ], + "request_time": 55.91564965248108 + }, + "sample_part_016.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, capturing the scene from a high angle. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left. The checkout lane is occupied by customers, with one person visible behind the counter. The counter itself is cluttered with various items, including a yellow and blue box, a red and white box, and a black and white box. The image is timestamped \"05-05-2025 Wed 04:35:24\" and labeled \"Camera 94.\"", + "tokens_per_second": 39.37396963384792, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 131, + "inference_time": 26.177982091903687, + "cpu_usage": 77.8, + "cpu_core_utilization": [ + 76.8, + 70.1, + 78.0, + 86.3 + ], + "request_time": 52.8818781375885 + }, + "sample_part_017.mp4": { + "response": "This image is a security camera view of a store checkout lane. The camera is positioned above the checkout counter, looking down at the customers and the items on the conveyor belt. The store appears to be well-lit and organized, with a blue wall on the right side and a white wall on the left side. The checkout lane is occupied by several customers, with one person standing at the counter and others waiting in line. The items on the conveyor belt include various products, such as toys, electronics, and clothing. The camera is capturing the scene from a high angle, providing a clear view of the entire checkout lane.", + "tokens_per_second": 38.884255600661035, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 128, + "inference_time": 24.90197491645813, + "cpu_usage": 83.2, + "cpu_core_utilization": [ + 85.3, + 81.4, + 80.7, + 85.5 + ], + "request_time": 50.27590870857239 + }, + "sample_part_018.mp4": { + "response": "This image is a surveillance camera view of a store's checkout lane. The camera is positioned above the counter, looking down at the customers and the items on the conveyor belt. The store appears to be well-lit and organized, with a blue wall on the right side and a white wall on the left side. The conveyor belt is filled with various items, including toys, books, and other merchandise. There are several customers in the image, all of whom are wearing blue shirts and hats. The customers are standing in line at the checkout lane, waiting to pay for their purchases. The image is clear and detailed, allowing for easy identification of the customers and the items on the conveyor belt.", + "tokens_per_second": 38.10626565587464, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 144, + "inference_time": 26.757917404174805, + "cpu_usage": 77.1, + "cpu_core_utilization": [ + 69.9, + 75.1, + 84.7, + 78.9 + ], + "request_time": 52.53176736831665 + }, + "sample_part_019.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, capturing the scene from a high angle. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left.\n\nIn the foreground, there are two checkout lanes. The lane on the left has a customer wearing a blue shirt and black pants, standing in front of the counter. The lane on the right has a customer wearing a black shirt and blue jeans, also standing in front of the counter.\n\nBehind the counter, there are two employees. One employee is wearing a blue shirt and black pants, while the other is wearing a black shirt and blue jeans. Both employees are standing behind the counter, ready to assist customers.\n\nThe store appears to be well-stocked, with various items visible on the shelves and in the checkout lanes. The overall atmosphere of the store seems calm and orderly.", + "tokens_per_second": 37.79767195865447, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 198, + "inference_time": 28.794264554977417, + "cpu_usage": 77.8, + "cpu_core_utilization": [ + 84.9, + 72.4, + 69.3, + 84.7 + ], + "request_time": 56.224116563797 + }, + "sample_part_020.mp4": { + "response": "This image is a security camera view of a store interior, captured at 23:39:24 on May 5, 2022. The camera, labeled \"Camera 94,\" is positioned above a checkout counter where a person is standing. The store appears to be a retail establishment, possibly a clothing or accessory store, as evidenced by the various items displayed on shelves and racks throughout the scene. The image provides a bird's-eye view of the store's layout, allowing for a comprehensive understanding of the store's organization and layout.", + "tokens_per_second": 38.21252206264074, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 112, + "inference_time": 25.314521551132202, + "cpu_usage": 81.7, + "cpu_core_utilization": [ + 82.3, + 75.8, + 79.9, + 88.8 + ], + "request_time": 50.995611906051636 + }, + "sample_part_021.mp4": { + "response": "This image is a surveillance camera view of a store interior, captured on Wednesday, May 2nd, 2025, at 9:39:54 AM. The camera is positioned above the checkout counter, providing a bird's-eye view of the store's layout.\n\nThe store appears to be a retail establishment, possibly a department store or a specialty shop. The checkout counter is located on the left side of the image, with a blue wall visible on the right side. The store's interior features a tiled floor and a large mural depicting a cityscape on the right side of the image.\n\nThe store is well-stocked with merchandise, including various items displayed on shelves and racks throughout the space. The checkout counter is also stocked with items, likely for sale to customers.\n\nThere are several people visible in the image, including store employees and customers. The employees are wearing blue uniforms and are engaged in various tasks, such as assisting customers and managing the checkout process.\n\nThe store's layout and design suggest a focus on creating a welcoming and organized shopping environment. The large mural on the right side of the image adds a decorative element to the store's interior, while the tiled floor provides a clean and modern aesthetic.\n\nOverall, this image provides a comprehensive view of a retail store's interior, showcasing its layout, merchandise, and customer service environment.", + "tokens_per_second": 39.54199949017956, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 283, + "inference_time": 30.048882722854614, + "cpu_usage": 73.8, + "cpu_core_utilization": [ + 77.7, + 70.0, + 68.1, + 79.3 + ], + "request_time": 60.51526761054993 + }, + "sample_part_022.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 24, 2025, at 4:41:02 PM. The camera, labeled \"Camera 94,\" is positioned in the upper right corner of the image. The store appears to be a retail space with a variety of products displayed on shelves and racks.\n\nIn the center of the image, there is a long, narrow aisle that runs from the foreground to the background. The aisle is lined with shelves stocked with various items, including what appear to be books, DVDs, and other merchandise. The products are arranged in an orderly fashion, with clear signage and labels visible on the shelves.\n\nOn the left side of the image, there is a checkout counter with a cash register and a customer standing in front of it. The counter is cluttered with various items, including papers, books, and other merchandise. There is also a person standing behind the counter, likely a store employee or cashier.\n\nThe right side of the image features a large blue wall with a mural depicting a cityscape. The mural is vibrant and colorful, adding a lively atmosphere to the store. There are also several security cameras mounted on the wall, likely for surveillance purposes.\n\nOverall, the image provides a detailed view of a retail store interior, showcasing its layout, merchandise, and security measures.", + "tokens_per_second": 39.54947543028791, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 287, + "inference_time": 28.72929811477661, + "cpu_usage": 79.0, + "cpu_core_utilization": [ + 77.2, + 73.8, + 80.5, + 84.6 + ], + "request_time": 58.824045181274414 + }, + "sample_part_023.mp4": { + "response": "This image is a security camera view of a store, captured on Wednesday, May 2, 2025, at 4:42:14 PM. The camera is positioned above a checkout counter, providing a bird's-eye view of the store's interior. The store appears to be well-lit, with a blue wall visible on the right side of the image.\n\nThe checkout counter is the focal point of the image, with a customer standing in front of it. The counter is cluttered with various items, including what appears to be a pile of toys or games. Behind the counter, a store employee is visible, wearing a blue shirt and black pants.\n\nThe store's interior is well-organized, with shelves stocked with merchandise on either side of the checkout counter. The floor is tiled, and the ceiling is high, giving the store a spacious feel.\n\nOverall, the image provides a detailed view of a typical store checkout area, with a focus on the checkout counter and the store employee.", + "tokens_per_second": 39.12818796943181, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 210, + "inference_time": 28.117918729782104, + "cpu_usage": 78.2, + "cpu_core_utilization": [ + 69.3, + 77.7, + 88.1, + 77.7 + ], + "request_time": 55.166643381118774 + }, + "sample_part_024.mp4": { + "response": "This image captures a bustling scene from a security camera inside a store, providing a bird's eye view of the activity below. The camera, labeled \"Camera 94,\" is positioned to the right of the frame, offering a clear perspective of the store's interior.\n\nThe store appears to be well-stocked with a variety of items displayed on shelves and racks. The floor is covered with tiles, adding to the store's organized appearance. The camera's position allows for a comprehensive view of the store's layout and the items on display.\n\nThe image provides a detailed look at the store's operations, showcasing the layout and the items available for purchase. The camera's perspective offers a unique view of the store's interior, allowing viewers to see the store's layout and the items on display.", + "tokens_per_second": 39.941217640686915, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 165, + "inference_time": 26.89288854598999, + "cpu_usage": 78.8, + "cpu_core_utilization": [ + 83.5, + 79.6, + 73.1, + 79.2 + ], + "request_time": 53.01892924308777 + }, + "sample_part_025.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 2, 2025, at 4:44:11 PM. The camera is positioned above a checkout counter, providing a bird's-eye view of the store's layout.\n\nThe store appears to be a retail space, possibly a department store or a specialty shop. The checkout counter is located in the center of the image, with a blue wall visible to the right. The store's floor is tiled, and there are various items displayed throughout the space.\n\nThe camera view shows a person standing behind the checkout counter, likely a cashier or store employee. The store is well-lit, with overhead lighting illuminating the area.\n\nThe image provides a comprehensive view of the store's interior, allowing viewers to understand the layout and organization of the space. It's a useful tool for security purposes, as well as for customers who may need assistance with their purchases.", + "tokens_per_second": 40.173796211251464, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 197, + "inference_time": 27.58555293083191, + "cpu_usage": 77.7, + "cpu_core_utilization": [ + 82.9, + 62.6, + 68.5, + 96.5 + ], + "request_time": 54.915573596954346 + }, + "sample_part_026.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, capturing the scene from a high angle. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left. The checkout lane itself is occupied by customers, with one person visible behind the counter. The camera is labeled \"Camera 94\" in the bottom right corner.", + "tokens_per_second": 41.29902120715977, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 89, + "inference_time": 23.746012210845947, + "cpu_usage": 80.4, + "cpu_core_utilization": [ + 79.6, + 84.1, + 81.0, + 76.9 + ], + "request_time": 48.21646165847778 + }, + "sample_part_027.mp4": { + "response": "This image is a surveillance camera view, captured on Wednesday, May 2nd, 2025, at 4:45:02 PM. The scene is set in a store, where a person is standing behind a counter, presumably a cashier. The counter is cluttered with various items, including a blue tarp, a blue tarp, and a blue tarp. The store appears to be well-lit, with a blue wall visible in the background. The camera is positioned directly in front of the counter, providing a clear view of the cashier and the items on the counter.", + "tokens_per_second": 41.296913159264854, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 124, + "inference_time": 24.62866997718811, + "cpu_usage": 79.0, + "cpu_core_utilization": [ + 87.2, + 77.0, + 70.0, + 81.6 + ], + "request_time": 51.253766775131226 + }, + "sample_part_028.mp4": { + "response": "This image is a security camera view of a store, captured on Wednesday, May 2, 2025, at 4:47:42 PM. The camera is positioned above the checkout counter, providing a bird's-eye view of the store's interior. The store appears to be a retail space, with various items displayed on shelves and racks.\n\nIn the foreground, there is a checkout counter with a cash register and a customer. Behind the counter, there are several employees, including a cashier and a store manager. The employees are wearing blue uniforms, and the store manager is wearing a black uniform.\n\nThe store has a blue wall on the right side, and there is a large window on the left side. The window is covered with a blue curtain, and there is a sign on the window that reads \"Welcome to our store.\"\n\nThe store appears to be well-lit, and there are several shelves and racks filled with various items. The items on the shelves and racks include clothing, accessories, and other merchandise.\n\nOverall, the image provides a clear view of the store's interior, including the checkout counter, employees, and merchandise.", + "tokens_per_second": 39.910911105816794, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 240, + "inference_time": 28.971904516220093, + "cpu_usage": 75.8, + "cpu_core_utilization": [ + 74.4, + 74.7, + 76.0, + 77.9 + ], + "request_time": 58.34457969665527 + }, + "sample_part_029.mp4": { + "response": "This image is a security camera view of a store, captured on Wednesday, May 2, 2025, at 4:40:02 PM. The camera is positioned above the checkout counter, providing a bird's-eye view of the store's interior. The store appears to be well-lit, with a blue wall visible on the right side of the image.\n\nThe store is filled with various items for sale, including clothing and accessories. The checkout counter is located in the center of the image, with a cash register and a customer standing in front of it. Behind the counter, there are shelves stocked with merchandise.\n\nThe store's layout is organized, with aisles running parallel to the checkout counter. The floor is tiled, and the ceiling is high, giving the store a spacious feel. The image provides a clear view of the store's interior, allowing viewers to see the layout and the items for sale.", + "tokens_per_second": 38.770346069233774, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 193, + "inference_time": 26.909424543380737, + "cpu_usage": 77.2, + "cpu_core_utilization": [ + 77.0, + 73.5, + 76.6, + 81.9 + ], + "request_time": 56.532543659210205 + }, + "sample_part_030.mp4": { + "response": "This is a security camera footage captured from a store's interior. The camera is positioned above a checkout counter, providing a bird's-eye view of the store's layout. The footage shows a long aisle with various products displayed on shelves and racks. There are several people visible in the image, including store employees and customers. The store appears to be well-lit and organized, with clear signage and a clean environment. The footage is timestamped with the date \"05-05-2025\" and the time \"24:43:41\".", + "tokens_per_second": 38.867499480898886, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 113, + "inference_time": 24.94011878967285, + "cpu_usage": 82.0, + "cpu_core_utilization": [ + 85.2, + 68.3, + 78.1, + 96.2 + ], + "request_time": 49.87141394615173 + }, + "sample_part_031.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store entrance. The camera, labeled \"Camera 94,\" provides a comprehensive view of the store's interior and exterior.\n\nThe store entrance is flanked by two large blue walls, creating a striking visual contrast. These walls are adorned with a mural depicting a city skyline, adding an artistic touch to the commercial space.\n\nInside the store, the floor is covered with gray tiles, providing a neutral backdrop for the various elements within the scene. A white countertop is visible, likely serving as a checkout area or customer service desk.\n\nThe store is filled with numerous people, suggesting a busy shopping day. Many of these individuals are wearing blue shirts, which could indicate they are employees or part of a promotional event.\n\nThe camera's perspective allows for a clear view of the store's layout and the activities taking place. It provides a unique vantage point, capturing the dynamic atmosphere of the store from above.", + "tokens_per_second": 42.60848705585198, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 201, + "inference_time": 24.630619764328003, + "cpu_usage": 38.9, + "cpu_core_utilization": [ + 21.8, + 63.8, + 20.6, + 49.7 + ], + "request_time": 51.945650815963745 + }, + "sample_part_032.mp4": { + "response": "This image is a security camera view of a store interior, captured from a high vantage point. The camera is positioned above the store, providing a wide-angle view of the area. The store appears to be well-lit, with a variety of products displayed on shelves and racks.\n\nIn the foreground, there is a checkout counter with a cash register and a customer standing in line. The customer is wearing a blue shirt and has a bag in front of them. Behind the counter, there is a store employee wearing a blue shirt and a name tag. The employee is standing behind the counter, ready to assist customers.\n\nThe store has a blue wall on the right side of the image, with a large window or display area. The window or display area is not clearly visible in the image.\n\nThe image is timestamped \"05-05-2025 Wed 02:51:51\" and is labeled \"Camera 94\". This suggests that the image was taken by a security camera located in the store.\n\nOverall, the image provides a detailed view of the store's interior, showcasing the checkout counter, the employee, and the store's layout.", + "tokens_per_second": 41.68167505655786, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 240, + "inference_time": 25.702566385269165, + "cpu_usage": 55.5, + "cpu_core_utilization": [ + 41.3, + 65.4, + 56.7, + 58.6 + ], + "request_time": 51.90046000480652 + }, + "sample_part_033.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, capturing the scene from a high angle. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left. The checkout lane is occupied by customers, with one person visible behind the counter. The camera is labeled \"Camera 94\" in the bottom right corner.", + "tokens_per_second": 43.18803826536313, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 88, + "inference_time": 22.062783002853394, + "cpu_usage": 39.8, + "cpu_core_utilization": [ + 22.0, + 21.8, + 95.1, + 20.1 + ], + "request_time": 44.42702794075012 + }, + "sample_part_034.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 2, 2025, at 12:53:41. The camera is positioned above the checkout counter, providing a clear view of the store's layout and activities.\n\nThe store appears to be a retail space, possibly a department store or a specialty shop. The checkout counter is located on the left side of the image, with a cash register and various items for sale. Behind the counter, there is a display of merchandise, including what looks like clothing and possibly accessories.\n\nThe store floor is tiled, and the overall lighting suggests it's daytime. The camera's perspective allows for a comprehensive view of the store's interior, including the checkout area, merchandise displays, and the store's ceiling.\n\nThe image provides a detailed look at the store's organization and layout, giving viewers a sense of the store's environment and the activities that take place within it.", + "tokens_per_second": 42.77388431601942, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 195, + "inference_time": 24.491409063339233, + "cpu_usage": 50.7, + "cpu_core_utilization": [ + 48.9, + 68.1, + 50.9, + 35.1 + ], + "request_time": 49.37525129318237 + }, + "sample_part_035.mp4": { + "response": "This image is a security camera view of a store interior, captured from a high vantage point. The camera is positioned above the checkout counter, providing a wide-angle view of the store's layout. The store appears to be well-lit, with a variety of products displayed on shelves and racks throughout the space.\n\nThe camera's perspective allows for a clear view of the store's layout, including the checkout counter, shelves stocked with merchandise, and aisles leading to other sections of the store. The image also shows a blue wall with a mural depicting a cityscape, adding a decorative element to the store's interior.\n\nThe timestamp on the image indicates that it was captured on May 5, 2025, at 2:24:51 AM. The camera is labeled \"Camera 94,\" suggesting that it is part of a larger security system monitoring the store.\n\nOverall, this image provides a detailed view of the store's interior, showcasing its layout, merchandise, and decorative elements. The high-angle perspective allows for a comprehensive view of the store, making it an effective tool for security monitoring and inventory management.", + "tokens_per_second": 42.56102609895685, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 235, + "inference_time": 25.444505214691162, + "cpu_usage": 38.0, + "cpu_core_utilization": [ + 56.1, + 44.5, + 33.2, + 18.2 + ], + "request_time": 51.33426523208618 + }, + "sample_part_036.mp4": { + "response": "This image is a surveillance camera view of a store's checkout lane. The camera is positioned above the checkout counter, providing a clear view of the area where customers place their items and pay for their purchases. The store appears to be well-stocked, with a variety of products visible on the shelves and in the checkout lane. The image is captured in a horizontal format, with the camera angle slightly tilted to provide a comprehensive view of the checkout area. The image is labeled \"Camera 94\" in the bottom right corner, indicating the specific camera used to capture this footage.", + "tokens_per_second": 43.1751414519081, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 118, + "inference_time": 22.82624650001526, + "cpu_usage": 39.9, + "cpu_core_utilization": [ + 38.8, + 21.6, + 78.3, + 20.7 + ], + "request_time": 45.970396518707275 + }, + "sample_part_037.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store entrance. The camera's view is angled downwards, providing a comprehensive look at the store's interior. The store is filled with a variety of items, including numerous backpacks and bags in different colors and sizes. These items are neatly arranged on shelves and racks, creating a colorful display.\n\nIn addition to the backpacks and bags, the store also features a selection of clothing items. These garments are hung on racks, adding to the store's diverse inventory. The overall atmosphere of the store appears to be one of organization and variety, with a wide range of products available for customers to browse and purchase.", + "tokens_per_second": 42.91101890756983, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 142, + "inference_time": 23.26037883758545, + "cpu_usage": 39.4, + "cpu_core_utilization": [ + 20.5, + 55.9, + 21.0, + 60.1 + ], + "request_time": 46.87315130233765 + }, + "sample_part_038.mp4": { + "response": "This image is a screenshot from a security camera, captured on Wednesday, May 2, 2025, at 2:25:57 PM. The camera is labeled \"Camera 94\" and is positioned in a store, providing a top-down view of the scene. The store appears to be a retail environment, with a long aisle running down the center. On either side of the aisle, there are various products displayed, including what looks like clothing and other merchandise. The store has a blue wall on the right side, and the floor is tiled. The camera's perspective suggests it is monitoring the entire length of the aisle, capturing a wide view of the store's layout and product offerings.", + "tokens_per_second": 43.039363167119106, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 144, + "inference_time": 23.33311414718628, + "cpu_usage": 39.0, + "cpu_core_utilization": [ + 32.3, + 46.4, + 47.3, + 29.8 + ], + "request_time": 47.04003715515137 + }, + "sample_part_039.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, capturing the scene from a high angle. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left. The checkout lane itself is white and has a conveyor belt system for processing transactions.\n\nThere are several people in the image, all of whom are standing in line at the checkout counter. They are wearing a variety of clothing, including blue shirts, gray shirts, and black pants. Some of the individuals are carrying bags or backpacks.\n\nThe store is stocked with various items, including books, toys, and other merchandise. The shelves and counter are filled with these items, creating a colorful and cluttered appearance.\n\nOverall, the image provides a detailed view of a typical checkout lane in a retail store, with customers waiting in line and a variety of products on display.", + "tokens_per_second": 43.06007293674169, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 197, + "inference_time": 24.494148015975952, + "cpu_usage": 38.6, + "cpu_core_utilization": [ + 28.2, + 20.0, + 86.1, + 20.0 + ], + "request_time": 49.420044898986816 + }, + "sample_part_040.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, looking down at the customers and the items on the conveyor belt. The store appears to be well-lit and organized, with a blue wall on the right side and a white wall on the left side. The checkout lane is filled with customers, some of whom are wearing blue shirts. The items on the conveyor belt include various products, such as clothing and accessories. The camera is labeled \"Camera 94\" in the bottom right corner.", + "tokens_per_second": 42.90120292725611, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 115, + "inference_time": 22.5648512840271, + "cpu_usage": 39.4, + "cpu_core_utilization": [ + 23.0, + 33.0, + 19.8, + 81.8 + ], + "request_time": 45.57593035697937 + }, + "sample_part_041.mp4": { + "response": "This is a surveillance camera view of a store aisle. The camera is positioned above the store, looking down at the merchandise. The aisle is filled with various items, including clothing and accessories. There are people walking down the aisle, browsing the merchandise. The camera captures the store from a high angle, providing a wide view of the entire aisle. The image is in color and shows the store in daylight.", + "tokens_per_second": 43.534909908199374, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 82, + "inference_time": 21.86381769180298, + "cpu_usage": 40.0, + "cpu_core_utilization": [ + 26.5, + 21.5, + 90.0, + 21.9 + ], + "request_time": 44.144389390945435 + }, + "sample_part_042.mp4": { + "response": "This image is a security camera view of a store checkout lane. The camera is positioned above the checkout counter, looking down at the customers and the items on the conveyor belt. The store appears to be well-lit and organized, with a blue wall on the right side and a white wall on the left side. The checkout lane is occupied by several customers, with one person standing at the counter and others waiting in line. The items on the conveyor belt include various products, such as toys and electronics. The camera is labeled \"Camera 94\" in the bottom right corner.", + "tokens_per_second": 42.90742954996482, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 119, + "inference_time": 22.7154757976532, + "cpu_usage": 39.0, + "cpu_core_utilization": [ + 20.2, + 51.9, + 20.0, + 64.1 + ], + "request_time": 45.78502702713013 + }, + "sample_part_043.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store checkout counter. The camera, labeled \"Camera 94,\" provides a bird's-eye view of the area, revealing a long checkout lane filled with customers. The store's interior is visible, showcasing a blue wall adorned with a mural of a cityscape.\n\nAt the checkout counter, a person dressed in a blue shirt is seen attending to the transaction. The counter itself is cluttered with various items, including a white box, a blue box, and a yellow box. The counter is also equipped with a cash register and a credit card machine.\n\nThe store appears to be well-stocked, with numerous items displayed on shelves and racks. The image provides a comprehensive view of the store's layout and the activities taking place within it.", + "tokens_per_second": 42.647811292769354, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 171, + "inference_time": 24.01733160018921, + "cpu_usage": 38.6, + "cpu_core_utilization": [ + 29.3, + 66.2, + 19.3, + 39.4 + ], + "request_time": 48.481837034225464 + }, + "sample_part_044.mp4": { + "response": "This image is a security camera view of a store, captured on Wednesday, May 3, 2025, at 8:03:19 AM. The camera is positioned above the checkout counter, providing a bird's-eye view of the store's interior. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left.\n\nIn the foreground, a person wearing a blue shirt is standing at the checkout counter, likely a customer. The counter is cluttered with various items, including a yellow box, a blue box, and a white box. The person is also holding a yellow object in their hand.\n\nThe store's interior is visible in the background, with shelves stocked with various items. The shelves are arranged in a neat and orderly fashion, with items grouped together by type. The store appears to be well-organized and clean.\n\nOverall, this image provides a detailed view of a store's interior, capturing the checkout counter and the store's interior layout. The security camera view offers a unique perspective, allowing viewers to see the store's layout and organization from above.", + "tokens_per_second": 42.59159214178798, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 235, + "inference_time": 25.49647355079651, + "cpu_usage": 38.1, + "cpu_core_utilization": [ + 65.0, + 33.4, + 35.0, + 18.8 + ], + "request_time": 51.309805154800415 + }, + "sample_part_045.mp4": { + "response": "This image captures a bustling scene from a security camera positioned above a store checkout counter. The camera, labeled \"Camera 94,\" provides a bird's-eye view of the area, revealing a variety of objects and people.\n\nAt the center of the image, a checkout counter is visible, with a cashier stationed behind it. The counter is cluttered with various items, including a blue helmet, a yellow bag, and a blue t-shirt.\n\nThe background of the image features a blue wall adorned with a mural depicting a city skyline. The mural adds a touch of urban charm to the scene.\n\nIn the foreground, several people can be seen. Some are standing in line at the checkout counter, while others are walking around the store. The exact number of people is difficult to determine due to the angle of the image.\n\nOverall, this image provides a detailed snapshot of a typical day at a busy store, with various objects and people contributing to the lively atmosphere.", + "tokens_per_second": 42.61727911180951, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 202, + "inference_time": 24.743446350097656, + "cpu_usage": 38.2, + "cpu_core_utilization": [ + 22.8, + 83.6, + 19.8, + 26.4 + ], + "request_time": 49.84172797203064 + }, + "sample_part_046.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, looking down at the items being checked out. The store appears to be well-lit and organized, with a variety of products on display. The checkout lane is currently empty, with no customers in sight. The image is captured from a high angle, providing a clear view of the entire checkout area.", + "tokens_per_second": 43.059706339265055, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 84, + "inference_time": 21.98611617088318, + "cpu_usage": 39.9, + "cpu_core_utilization": [ + 22.2, + 66.7, + 21.8, + 49.2 + ], + "request_time": 44.27195644378662 + }, + "sample_part_047.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 5, 2025, at 8:00:53 AM. The camera, labeled \"Camera 94,\" is positioned in the upper right corner of the frame. The store appears to be well-lit, with a blue wall visible on the right side of the image.\n\nThe store's layout is not clearly visible, but there are several items on display, including what appears to be a display of toys or other small items. The store's floor is tiled, and there are no people visible in the image. The overall atmosphere of the store seems to be quiet and orderly.", + "tokens_per_second": 42.53428786851631, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 140, + "inference_time": 23.264490604400635, + "cpu_usage": 38.7, + "cpu_core_utilization": [ + 22.5, + 21.1, + 85.9, + 25.2 + ], + "request_time": 46.93555450439453 + }, + "sample_part_048.mp4": { + "response": "This image is a security camera view of a store's interior, captured on Wednesday, May 4, 2025, at 4:00 PM. The camera is positioned above the store, providing a bird's-eye view of the area. The store appears to be well-lit, with a variety of products displayed on shelves and racks. The products are arranged in an organized manner, making it easy for customers to browse and find what they are looking for. The store's layout and design suggest that it is a modern, well-maintained establishment.", + "tokens_per_second": 43.10943641242185, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 116, + "inference_time": 22.68943977355957, + "cpu_usage": 39.3, + "cpu_core_utilization": [ + 38.0, + 42.1, + 56.6, + 20.4 + ], + "request_time": 45.881267070770264 + }, + "sample_part_049.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, capturing the scene from a high angle. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left. The checkout lane itself is white, with a black conveyor belt running down the center.\n\nThere are several people in the image, all of whom are standing in line at the checkout counter. They are wearing a variety of clothing, including blue, yellow, and gray shirts. Some of the people are carrying bags or backpacks.\n\nThe store appears to be well-stocked, with shelves and displays visible in the background. There are also several signs and advertisements visible, including a sign for a store called \"The Store\" and a sign for a store called \"The Market.\"\n\nOverall, the image gives the impression of a busy, well-organized store with a variety of products on display.", + "tokens_per_second": 42.78977476040254, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 200, + "inference_time": 24.592493772506714, + "cpu_usage": 38.3, + "cpu_core_utilization": [ + 21.3, + 37.6, + 18.5, + 75.6 + ], + "request_time": 49.670050859451294 + }, + "sample_part_050.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 5, 2025, at 8:00:03 AM. The camera is positioned above the checkout counter, providing a bird's-eye view of the store's layout. The store appears to be a retail space, possibly a department store or a specialty shop, with various products displayed throughout the area.\n\nThe store has a high ceiling and a spacious layout, with aisles and walkways visible in the image. The floor is tiled, and the walls are painted in neutral colors, creating a clean and modern aesthetic. The store is well-lit, with overhead lighting illuminating the products and the overall space.\n\nThe image shows a variety of products, including clothing, accessories, and possibly electronics or other consumer goods. The products are arranged on shelves, racks, and displays throughout the store, creating a visually appealing and organized environment.\n\nThe store's layout is designed to facilitate easy navigation and shopping, with clear pathways for customers to move through the space. The checkout counter is located at the front of the store, with a clear view of the products and the store's interior.\n\nOverall, the image provides a comprehensive view of the store's interior, showcasing its layout, product displays, and overall design. The security camera view offers a unique perspective on the store's operations, providing valuable information for security and management purposes.", + "tokens_per_second": 42.601318461085675, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 295, + "inference_time": 26.902544021606445, + "cpu_usage": 37.2, + "cpu_core_utilization": [ + 64.1, + 32.5, + 25.5, + 26.8 + ], + "request_time": 54.1536865234375 + }, + "sample_part_051.mp4": { + "response": "This image is a security camera view of a store interior, captured on Wednesday, May 4, 2025, at 10:10 AM. The camera is positioned above the checkout area, providing a bird's-eye view of the store's layout. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left.\n\nIn the center of the image, there is a long checkout counter with several customers standing in line. The customers are dressed in various colors, including blue, gray, and black. They are carrying different types of bags, such as backpacks, handbags, and suitcases.\n\nThe store's interior is well-organized, with shelves and displays stocked with various items. The shelves are filled with products, including books, DVDs, and other merchandise. The displays are arranged in an orderly fashion, making it easy for customers to browse and find what they are looking for.\n\nOverall, the image provides a clear and detailed view of the store's interior, showcasing its layout, organization, and customer service.", + "tokens_per_second": 42.85916018839002, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 228, + "inference_time": 25.262394428253174, + "cpu_usage": 38.3, + "cpu_core_utilization": [ + 67.3, + 20.3, + 45.2, + 20.3 + ], + "request_time": 51.0171115398407 + }, + "sample_part_052.mp4": { + "response": "This image is a security camera view of a store's checkout lane. The camera is positioned above the checkout counter, capturing the scene from a high angle. The store appears to be well-lit, with a blue wall on the right side and a white wall on the left. The checkout lane is occupied by customers, with one person standing at the counter and another person standing behind it. The store is well-stocked, with various products visible on the shelves and in the checkout lane. The image is clear and detailed, providing a comprehensive view of the store's checkout lane.", + "tokens_per_second": 42.59541770032984, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 118, + "inference_time": 22.760553121566772, + "cpu_usage": 40.7, + "cpu_core_utilization": [ + 34.6, + 31.3, + 60.6, + 36.2 + ], + "request_time": 45.888914346694946 + }, + "sample_part_053.mp4": { + "response": "This image is a surveillance camera view of a store interior, captured on Wednesday, May 12, 2023, at 4:12:01 PM. The camera, labeled \"Camera 94,\" is positioned in the upper right corner of the frame. The store appears to be well-lit, with a blue wall visible on the right side of the image.\n\nIn the center of the frame, there is a long, narrow aisle filled with various items. The aisle is lined with products that are difficult to identify due to the distance and angle of the camera. However, it appears that the products are arranged in rows, with some items placed on top of others.\n\nOn the left side of the aisle, there is a white counter with a cash register and a person standing behind it. The cashier is wearing a blue shirt and black pants, and is facing towards the right side of the image.\n\nThe aisle itself is crowded with products, and it is unclear how many items are on display. The products appear to be arranged in rows, with some items placed on top of others. The aisle is well-lit, and the products are clearly visible.\n\nOverall, the image provides a detailed view of the store interior, showcasing the layout and arrangement of the products. The camera's position and angle provide a clear view of the aisle and the products on display.", + "tokens_per_second": 42.52309268195912, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 282, + "inference_time": 26.741292715072632, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 18.6, + 68.7, + 17.1, + 45.2 + ], + "request_time": 53.71666145324707 + }, + "sample_part_054.mp4": { + "response": "This image shows a security camera view of a store aisle. The camera is positioned above the aisle, capturing a downward view of the merchandise. The aisle is filled with various items, including what appear to be toys, games, and possibly some clothing. The store has a blue wall on the right side of the aisle, and the floor is tiled. The camera is labeled \"Camera 94\" in the bottom right corner of the image.", + "tokens_per_second": 43.295534779120004, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 90, + "inference_time": 22.070221185684204, + "cpu_usage": 39.6, + "cpu_core_utilization": [ + 66.3, + 21.5, + 48.7, + 21.8 + ], + "request_time": 44.624393463134766 + }, + "sample_part_055.mp4": { + "response": "This image is a security camera footage captured from a store's interior. The camera is positioned above a checkout counter, providing a bird's-eye view of the store's layout. The store appears to be well-lit, with a variety of products displayed on shelves and racks throughout the space.\n\nIn the foreground, a person wearing a blue shirt is standing behind the checkout counter, likely a cashier or store employee. The counter itself is equipped with a cash register and various items for sale, including what appear to be toys or collectibles.\n\nThe store's interior features a blue wall on the right side of the image, adorned with a mural depicting a cityscape. The floor is tiled, and the overall atmosphere suggests a clean and organized retail environment.\n\nThe timestamp on the footage indicates that it was captured on May 5, 2025, at 4:14:11 PM. This information, along with the security camera footage, provides a snapshot of the store's operations at a specific moment in time.", + "tokens_per_second": 43.15558723881245, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 211, + "inference_time": 24.835733652114868, + "cpu_usage": 38.2, + "cpu_core_utilization": [ + 33.6, + 44.5, + 22.5, + 52.2 + ], + "request_time": 50.15839123725891 + }, + "sample_part_056.mp4": { + "response": "This image is a screenshot from a security camera, captured on Wednesday, May 14, 2025, at 4:14:15 PM. The scene is set in a store, where a man is standing behind a counter, wearing a blue shirt and a black hat. He appears to be engaged in some activity at the counter, possibly assisting a customer or handling a transaction.\n\nThe store is well-lit and organized, with various items displayed on shelves and racks. The shelves are stocked with a variety of products, including what appears to be toys, games, and possibly electronics. The racks are filled with neatly arranged items, suggesting a well-maintained and orderly store environment.\n\nThe store's interior features a blue wall on the right side, which adds a pop of color to the scene. The floor is tiled, and the overall atmosphere of the store seems clean and inviting.\n\nThe security camera's perspective provides a clear view of the store's layout and the activities taking place. The man's position behind the counter indicates that he is likely an employee of the store, responsible for assisting customers and managing transactions.\n\nOverall, this image captures a moment in the daily operations of a retail store, highlighting the interactions between employees and customers, and the store's organized and inviting atmosphere.", + "tokens_per_second": 41.82676160274437, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 270, + "inference_time": 26.506124019622803, + "cpu_usage": 53.9, + "cpu_core_utilization": [ + 43.5, + 65.8, + 40.3, + 66.1 + ], + "request_time": 53.235275983810425 + }, + "sample_part_057.mp4": { + "response": "This image is a security camera view of a store, captured on Wednesday, May 16, 2025, at 4:16:27 PM. The camera is positioned above the checkout counter, providing a bird's-eye view of the store's interior. The store appears to be well-lit and organized, with various products displayed on shelves and racks.\n\nIn the foreground, there is a checkout counter with a cash register and a few customers standing in line. One customer is wearing a blue shirt and appears to be interacting with a store employee. The employee is standing behind the counter, assisting the customer with their transaction.\n\nThe store has a blue wall on the right side of the image, which features a mural of a city skyline. The mural adds a decorative element to the store's interior and creates a visually appealing backdrop for the checkout area.\n\nOverall, the image provides a clear and detailed view of the store's checkout area, showcasing the store's layout, products, and customer service.", + "tokens_per_second": 42.26603746275003, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 209, + "inference_time": 25.031978368759155, + "cpu_usage": 40.2, + "cpu_core_utilization": [ + 35.2, + 34.3, + 55.8, + 35.3 + ], + "request_time": 50.42637252807617 + }, + "sample_part_058.mp4": { + "response": "This image is a screenshot from a security camera, captured on Wednesday, May 5th, 2025, at 4:12:17 PM. The camera is labeled \"Camera 94\" and is positioned in a store, providing a top-down view of the scene. The store appears to be a retail environment, with a variety of items displayed on shelves and racks.\n\nIn the center of the image, there is a person wearing a blue shirt and black pants, who seems to be a store employee. They are standing in front of a large display that features a blue background with a cityscape. The display is quite colorful, with various items in shades of blue, yellow, and red.\n\nThe store itself has a tiled floor, and the overall atmosphere appears to be well-lit and organized. The camera's perspective gives a unique view of the store's layout and the items on display, providing a comprehensive look at the retail environment.", + "tokens_per_second": 43.234796973891356, + "peak_gpu_memory_mb": 9378.375, + "num_generated_tokens": 197, + "inference_time": 24.536916732788086, + "cpu_usage": 38.5, + "cpu_core_utilization": [ + 42.4, + 19.0, + 72.2, + 20.2 + ], + "request_time": 49.45543932914734 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/minicpm/20250820_233630/Clips_60s.json b/API_Transformers/output_0821/minicpm/20250820_233630/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..6b3aeaa4682f03ca09d94cecc9eef013718d66b6 --- /dev/null +++ b/API_Transformers/output_0821/minicpm/20250820_233630/Clips_60s.json @@ -0,0 +1,362 @@ +{ + "sample_part_001.mp4": { + "response": "The video captures a sequence of events inside a convenience store, viewed from an overhead security camera. Initially, the scene is relatively calm with two employees and several customers present. One employee stands behind the counter attending to tasks or interacting with the customer line, while another moves around the store possibly assisting customers or organizing items on the shelves.\n\nAs time progresses, more customers join the queue at the counter, indicating increased activity in the store. The interaction between the staff and customers becomes evident as one employee appears to be handling transactions or providing assistance to the growing number of people waiting their turn. Throughout the footage, there are no significant changes in the environment or introduction of new objects; the focus remains on the steady flow of customer interactions and the ongoing responsibilities of the store employees managing the situation efficiently.", + "tokens_per_second": 8.779788779248046, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 162, + "inference_time": 41.19897103309631, + "cpu_usage": 20.4, + "cpu_core_utilization": [ + 14.8, + 25.6, + 19.6, + 21.6 + ], + "request_time": 41.30454730987549 + }, + "sample_part_002.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several employees throughout the day. Initially, an employee is seen arranging items on the counter while another stands behind the register attending to tasks such as handling cash or processing transactions. As time progresses, additional staff members join the scene, all engaged in various duties related to managing the store's operations. The interactions among the workers suggest they are collaborating efficiently to maintain the store's daily functions, with some possibly assisting customers at the register and others organizing products on the shelves. The presence of multiple individuals working together highlights the teamwork required to keep the business running smoothly during busy periods.", + "tokens_per_second": 8.238054802466655, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 132, + "inference_time": 33.705246448516846, + "cpu_usage": 35.8, + "cpu_core_utilization": [ + 31.6, + 59.3, + 28.2, + 24.1 + ], + "request_time": 33.71944332122803 + }, + "sample_part_003.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several individuals. It begins with two people standing behind the counter, one wearing a blue shirt and the other dressed in white. A long line of customers extends from the entrance to the back of the store, all waiting their turn to make purchases. The person in the blue shirt appears to be handling transactions or assisting the customer at the front of the queue.\n\nAs time progresses, more customers join the end of the line while others move towards the front. The scene remains relatively static for some time, focusing on the steady flow of customers being attended to by the staff member in the blue shirt. Eventually, another individual wearing a yellow shirt joins the cashier area, possibly taking over tasks related to serving customers.\n\nThroughout the sequence, there are minimal changes in the positions of the customers and the actions of the employees. The primary activity revolves around the interaction between the cashier and the customers lining up outside the register. There is no significant movement or action apart from these interactions within the confines of the store.", + "tokens_per_second": 9.842206491452542, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 222, + "inference_time": 40.17866539955139, + "cpu_usage": 34.2, + "cpu_core_utilization": [ + 42.5, + 14.8, + 64.8, + 14.6 + ], + "request_time": 40.19294214248657 + }, + "sample_part_004.mp4": { + "response": "The video takes place inside a convenience store and captures the interactions between employees and customers. It begins with an employee assisting a customer at the counter, who is wearing a blue cap and appears to be making a purchase or inquiry. The scene remains consistent as another individual enters the frame from the left side, walking past the shelves towards the right. Throughout the sequence of images, there are no significant changes in the actions taking place; the focus stays on the ongoing interaction between the employee and the customer.", + "tokens_per_second": 7.223886604016651, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 100, + "inference_time": 31.570722579956055, + "cpu_usage": 36.1, + "cpu_core_utilization": [ + 35.9, + 19.2, + 72.5, + 16.5 + ], + "request_time": 31.584439754486084 + }, + "sample_part_005.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several individuals throughout its duration. Initially, we see an individual wearing a blue shirt interacting with another person who is seated at a counter on the left side of the frame. This interaction appears to be focused around transactions or inquiries related to items available for purchase behind the counter.\n\nAs time progresses, additional people enter the scene from the left edge of the frames. These newcomers add to the dynamic nature of the environment but do not alter the primary activity centered around the counter area. The interactions between the standing individuals and those already present continue without significant changes in their actions or positions relative to each other.\n\nThroughout the sequence, there are no notable alterations in the background setting or any major shifts in the arrangement of objects within the store. The shelves stocked with various products remain unchanged as well, maintaining consistency in the visual context provided by the surveillance camera capturing these events.", + "tokens_per_second": 9.414676082668533, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 193, + "inference_time": 38.22864317893982, + "cpu_usage": 34.6, + "cpu_core_utilization": [ + 34.4, + 30.3, + 14.1, + 59.5 + ], + "request_time": 38.24436354637146 + }, + "sample_part_006.mp4": { + "response": "The video takes place in a convenience store and captures the interactions of several individuals within this space. It begins with a person seated at the counter, possibly engaging in an activity such as using a device or handling items on the counter. Another individual enters the scene from the left side of the frame, walking towards the center where numerous people are gathered. These individuals appear to be standing close together, potentially waiting for assistance or interacting with each other.\n\nThroughout the sequence, there is minimal movement apart from these two main actions: the entrance of the second person and slight adjustments by those already present. The setting remains consistent, focusing on the interaction between the seated individual at the counter and the group of people nearby. There are no significant changes in the environment or introduction of new elements that alter the primary focus of the scenes depicted in the video.", + "tokens_per_second": 9.018116223758131, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 170, + "inference_time": 36.60003876686096, + "cpu_usage": 35.1, + "cpu_core_utilization": [ + 47.3, + 19.3, + 54.1, + 19.6 + ], + "request_time": 36.614866971969604 + }, + "sample_part_007.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several individuals. It begins with a person wearing a blue shirt standing behind the counter, seemingly engaged in some task related to managing transactions or stock. A long line of customers is visible extending from the entrance towards the back of the store, indicating high customer traffic during this time. As the video progresses, another individual appears near the front of the store, possibly assisting the first person at the counter or interacting with someone further inside the store. The scene remains consistent throughout the sequence, focusing on the interaction between the staff member and the waiting customers without significant changes in actions or environment.", + "tokens_per_second": 8.128884269772154, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 130, + "inference_time": 33.74411225318909, + "cpu_usage": 35.5, + "cpu_core_utilization": [ + 29.4, + 48.5, + 49.0, + 15.1 + ], + "request_time": 33.758394956588745 + }, + "sample_part_008.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several individuals over a period. Initially, two people are seen interacting at the counter; one appears to be an employee wearing a blue shirt, while the other is possibly a customer dressed in yellow. The scene remains largely unchanged for some time as these two continue their interaction. Shortly afterward, another individual enters the frame from the left side, carrying orange bags. This new person seems to approach the counter, potentially indicating that they have made a purchase or need assistance. Throughout the sequence, there are no significant changes in the background or additional actions occurring within the frames provided.", + "tokens_per_second": 8.104748907769563, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 129, + "inference_time": 33.688347578048706, + "cpu_usage": 35.7, + "cpu_core_utilization": [ + 22.5, + 24.9, + 45.2, + 50.3 + ], + "request_time": 33.70404505729675 + }, + "sample_part_009.mp4": { + "response": "The video takes place inside a convenience store and captures the interactions between employees and customers throughout several timestamps. Initially, an employee in blue attire is seen interacting with another person near the entrance of the store while numerous items are displayed on shelves behind them. As time progresses, this interaction continues with slight movements from both parties. The scene remains relatively static until a new individual wearing black enters the frame and engages with the same customer at the counter. This exchange appears to be more focused as they handle what seems to be a transaction or discussion regarding purchases. Throughout these sequences, there are no significant changes in the environment or additional activities observed apart from the aforementioned interactions.", + "tokens_per_second": 8.260490846413186, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 135, + "inference_time": 34.09119725227356, + "cpu_usage": 35.2, + "cpu_core_utilization": [ + 44.4, + 20.5, + 41.4, + 34.6 + ], + "request_time": 34.10764408111572 + }, + "sample_part_010.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several individuals within this setting. It begins with a person wearing a blue shirt interacting at the counter, possibly making a purchase or engaging in some transaction. Another individual, dressed in yellow, is also seen near the counter, likely assisting or accompanying the first person. Throughout the sequence, there are no significant changes in the actions or positions of these individuals; they remain focused on their tasks at the counter. The environment remains consistent, showing typical items found in a convenience store such as various products displayed on shelves. The primary activity revolves around the interaction between the two people at the counter, suggesting a routine process of buying or selling goods within the store.", + "tokens_per_second": 8.474433175082025, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 144, + "inference_time": 34.754554271698, + "cpu_usage": 35.3, + "cpu_core_utilization": [ + 40.2, + 15.6, + 70.7, + 14.8 + ], + "request_time": 34.76887345314026 + }, + "sample_part_011.mp4": { + "response": "The video takes place inside a convenience store and captures the activities of two employees during their workday. Initially, one employee is seen handling transactions at the checkout counter while another works behind the counter near the shelves stocked with various items. As time progresses, both employees continue to perform their respective tasks without significant changes in activity or interaction between them. The timestamp indicates that this sequence occurs on May 5, 2025, Wednesday, around midday (2:30 PM).", + "tokens_per_second": 7.281456041328239, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 102, + "inference_time": 31.77820658683777, + "cpu_usage": 35.9, + "cpu_core_utilization": [ + 41.3, + 59.3, + 27.5, + 15.5 + ], + "request_time": 31.79433846473694 + }, + "sample_part_012.mp4": { + "response": "The video takes place in a convenience store and captures the activities of two employees throughout an entire workday. It begins with one employee, dressed in blue, attending to tasks at the counter while another person, wearing yellow, interacts with them from behind the counter. As time progresses, the scene remains largely unchanged for several hours, showing minimal movement or action from both individuals. The focus is primarily on their interactions and occasional adjustments they make to items on the counter. Towards the end of the observed period, there are slight changes such as the appearance of additional people entering the frame, but these do not significantly alter the overall calm and steady atmosphere within the store.", + "tokens_per_second": 8.229193657182282, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 134, + "inference_time": 34.07243013381958, + "cpu_usage": 35.4, + "cpu_core_utilization": [ + 17.3, + 58.4, + 14.4, + 51.7 + ], + "request_time": 34.08636164665222 + }, + "sample_part_013.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several individuals. Initially, two people are present: one seated behind the counter wearing a blue shirt, possibly working or resting, and another standing nearby dressed in white. The scene is static with no noticeable actions taking place. As time progresses, a third individual appears, also sitting at the counter engaged in an activity involving their hands near the cash register area. Throughout the sequence, there are minimal changes in the positions and actions of these three individuals. The background remains consistent, showing shelves stocked with various items typical of a convenience store setting.", + "tokens_per_second": 7.933309315527597, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 123, + "inference_time": 33.3245952129364, + "cpu_usage": 35.5, + "cpu_core_utilization": [ + 59.0, + 27.1, + 15.4, + 40.4 + ], + "request_time": 33.339306116104126 + }, + "sample_part_014.mp4": { + "response": "The video takes place inside a convenience store and captures the activities of two employees throughout an entire shift. It begins with one employee standing behind the counter, seemingly engaged in some task or waiting for customers. The other employee is seated on the floor near the shelves. As time progresses, the scene remains largely unchanged, indicating that no significant events are occurring at this point.\n\nAs the hours pass, both employees remain in their respective positions: one behind the counter and the other sitting on the floor by the shelves. There are slight changes in their actions; occasionally, the seated employee appears to be looking up or reaching towards the shelves. However, these movements do not signify any major activity within the store.\n\nThroughout the duration of the footage, there are no visible customers entering or exiting the store, nor are there noticeable interactions between the employees or any items being handled actively. The environment stays static, suggesting a calm period without notable occurrences affecting the routine operations of the convenience store during this timeframe.", + "tokens_per_second": 9.6318698479325, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 208, + "inference_time": 39.460994482040405, + "cpu_usage": 34.5, + "cpu_core_utilization": [ + 46.5, + 21.2, + 28.6, + 41.7 + ], + "request_time": 39.47555422782898 + }, + "sample_part_015.mp4": { + "response": "The video takes place in a convenience store and captures the interactions between employees and customers. Initially, an employee wearing a blue shirt is seen attending to a customer at the counter. The background displays shelves stocked with various products. As time progresses, another person enters the frame from the left side, approaching the same area where the first interaction occurred. Throughout the sequence of images, there are no significant changes in the environment or actions; both individuals remain focused on their respective tasks within the confines of the convenience store setting.", + "tokens_per_second": 7.41501152980709, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 106, + "inference_time": 32.040939807891846, + "cpu_usage": 35.9, + "cpu_core_utilization": [ + 44.7, + 31.3, + 28.4, + 39.1 + ], + "request_time": 32.05526804924011 + }, + "sample_part_016.mp4": { + "response": "The video takes place inside a convenience store and captures the activities of several individuals. Initially, many people are seen standing in line at the counter, likely waiting to make purchases or receive assistance from the staff behind it. The timestamp indicates that this scene occurs on May 5, 2023, starting around 2:34 PM.\n\nAs time progresses, one person wearing a blue shirt moves closer to the front of the queue, possibly indicating they have been served or are about to be attended to by the cashier. Throughout the sequence, there is minimal movement among the customers except for these slight adjustments in position as they wait their turn.\n\nTowards the end of the observed period, an individual dressed in black appears near the entrance of the store. This new presence does not seem to affect the orderly nature of the queue but adds another layer to the activity within the store environment.", + "tokens_per_second": 9.31005578568323, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 187, + "inference_time": 37.84053945541382, + "cpu_usage": 34.4, + "cpu_core_utilization": [ + 26.5, + 43.5, + 51.5, + 16.2 + ], + "request_time": 37.85484457015991 + }, + "sample_part_017.mp4": { + "response": "The video takes place inside a convenience store and captures the activities of two employees during their workday. Initially, one employee is seen organizing or handling items on the counter while another person enters the scene from the left side. Shortly after, both employees are present at the counter; one continues with tasks such as counting money or preparing transactions, while the other remains engaged in similar duties. Throughout the sequence, there are no significant changes in the environment or introduction of new objects. The focus stays on the employees' interactions and responsibilities within the store setting.", + "tokens_per_second": 7.603712717977961, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 112, + "inference_time": 32.463836669921875, + "cpu_usage": 36.2, + "cpu_core_utilization": [ + 34.5, + 20.6, + 74.3, + 15.2 + ], + "request_time": 32.47782611846924 + }, + "sample_part_018.mp4": { + "response": "The video takes place inside a convenience store and captures the activities of several individuals. Initially, there is a person wearing a blue shirt standing behind the counter, seemingly engaged in some task related to managing transactions or stock. This individual remains at the counter throughout the sequence.\n\nAs time progresses, another person dressed in black enters the scene from the left side of the frame. This new arrival appears to be interacting with the items on the shelves adjacent to the counter, possibly organizing or selecting products for purchase. The presence of this second person introduces slight changes in their positioning relative to each other and the surrounding environment over the course of the video.\n\nThroughout the entire duration observed, no significant interactions between the two main subjects are noted; they remain relatively stationary within their respective areas - one behind the counter and the other near the shelves. The background consistently shows typical elements of a convenience store such as various packaged goods and signage. There are no notable alterations to these surroundings during the sequence captured by the camera.", + "tokens_per_second": 9.605931622305029, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 206, + "inference_time": 41.81000638008118, + "cpu_usage": 53.0, + "cpu_core_utilization": [ + 47.2, + 51.0, + 44.8, + 69.2 + ], + "request_time": 41.82394623756409 + }, + "sample_part_019.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several employees during their workday. It begins with two employees standing behind the counter, one handling transactions and the other organizing items on the shelves. As time progresses, another employee joins them at the counter. The scene remains consistent throughout, focusing on the interactions between the employees as they manage tasks such as serving customers and restocking products.", + "tokens_per_second": 6.553943613195835, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 83, + "inference_time": 30.420403480529785, + "cpu_usage": 36.3, + "cpu_core_utilization": [ + 28.7, + 56.6, + 44.1, + 15.8 + ], + "request_time": 30.43483829498291 + }, + "sample_part_020.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several individuals within this space. It begins with one person standing near the entrance, while another individual is seated at the counter. As time progresses, more people join the scene, filling up the area around the counter. The interactions among these individuals suggest they are engaged in various tasks related to shopping or transactions. Throughout the sequence, there are no significant changes in the environment or introduction of new objects. The focus remains on the movements and actions of the people present, highlighting their engagement in typical retail activities such as browsing, waiting, and possibly making purchases.", + "tokens_per_second": 7.993319182461253, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 125, + "inference_time": 33.458741664886475, + "cpu_usage": 35.9, + "cpu_core_utilization": [ + 55.1, + 38.2, + 24.9, + 25.4 + ], + "request_time": 33.472397804260254 + }, + "sample_part_021.mp4": { + "response": "The video takes place inside a convenience store and captures the interactions between customers and staff throughout a period of time. Initially, there is minimal activity with only one customer present near the counter. As time progresses, more individuals enter the scene, forming a line at the checkout area. The employees behind the counter are actively engaged in assisting these customers by handling transactions and managing the queue. Throughout the sequence, the number of people waiting to make purchases increases gradually until it reaches its peak. Despite this growing crowd, the employees continue their tasks efficiently, ensuring that each transaction is processed smoothly. Towards the end of the observed period, the volume of customers begins to decrease slightly as some complete their purchases and leave the store.", + "tokens_per_second": 8.475114910409703, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 144, + "inference_time": 34.68561768531799, + "cpu_usage": 35.2, + "cpu_core_utilization": [ + 23.3, + 27.9, + 41.5, + 48.1 + ], + "request_time": 34.69935965538025 + }, + "sample_part_022.mp4": { + "response": "The video captures a scene inside a convenience store on May 5, 2025. It shows multiple individuals gathered around the counter area, engaging in various activities such as interacting with each other and handling items at the register. One person is consistently seen sitting behind the counter, presumably working there. The timestamp indicates that the events unfold over a period of time, starting from 2:49 PM and progressing to 2:51 PM. Throughout the sequence, the crowd remains relatively stable in size and position, suggesting a continuous flow of interactions without significant changes or departures within the group.", + "tokens_per_second": 7.968927639160867, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 124, + "inference_time": 33.2837700843811, + "cpu_usage": 35.7, + "cpu_core_utilization": [ + 63.9, + 25.4, + 38.6, + 14.6 + ], + "request_time": 33.29735851287842 + }, + "sample_part_023.mp4": { + "response": "The video captures a sequence of events inside a convenience store on May 5, 2025. Initially, the scene is relatively calm with only one person visible near the counter. As time progresses, another individual enters the frame and stands beside the first person at the counter. Shortly afterward, a third person appears in the background, adding to the interaction at the counter area. The timestamp indicates that these interactions are taking place between 10:48 AM and 12:00 PM.", + "tokens_per_second": 7.454720470818721, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 107, + "inference_time": 32.14301896095276, + "cpu_usage": 35.9, + "cpu_core_utilization": [ + 54.3, + 22.4, + 50.0, + 17.0 + ], + "request_time": 32.157259464263916 + }, + "sample_part_024.mp4": { + "response": "The video takes place in a convenience store and captures the activities of several employees during their workday. It begins with an employee handling transactions at the checkout counter, while another employee is seen organizing items on the shelves nearby. As time progresses, additional employees join the scene, contributing to various tasks within the store. One employee moves towards the back of the store, possibly attending to inventory or restocking duties. The sequence continues with more employees arriving and engaging in different aspects of managing the store's operations. Throughout the video, the timestamp indicates that these events occur over a span of about 40 minutes on May 5, 2025.", + "tokens_per_second": 8.25678903581856, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 135, + "inference_time": 34.09045100212097, + "cpu_usage": 35.5, + "cpu_core_utilization": [ + 51.5, + 45.5, + 16.6, + 28.4 + ], + "request_time": 34.104634284973145 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/minicpm/20250820_235407/Clips_60s.json b/API_Transformers/output_0821/minicpm/20250820_235407/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..bf60a926dbed33b27eb6e4fd290cb127078d4906 --- /dev/null +++ b/API_Transformers/output_0821/minicpm/20250820_235407/Clips_60s.json @@ -0,0 +1,842 @@ +{ + "sample_part_001.mp4": { + "response": "A group of people are seated in a convenience store. One woman is sitting on the left side, looking at something she has placed down. The man behind the counter looks up and then back down as he talks to someone off-screen. He picks up an item from his shelf and hands it over. A person wearing blue enters through the doorway and sits next to the first lady.", + "tokens_per_second": 6.4951065299349615, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 79, + "inference_time": 29.853224754333496, + "cpu_usage": 6.8, + "cpu_core_utilization": [ + 6.3, + 7.3, + 10.0, + 3.6 + ], + "request_time": 29.867039680480957 + }, + "sample_part_002.mp4": { + "response": "A woman is purchasing an item from a convenience store. She picks up the product and pays for it at the register. The cashier processes her payment, rings up the sale, and hands over the items to the customer.", + "tokens_per_second": 4.620365146287069, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 46, + "inference_time": 27.5525860786438, + "cpu_usage": 37.7, + "cpu_core_utilization": [ + 23.5, + 33.0, + 45.6, + 48.7 + ], + "request_time": 27.566730737686157 + }, + "sample_part_003.mp4": { + "response": "In this footage, we see a crowded convenience store with numerous individuals standing in line. The scene is captured from an overhead perspective showing the layout of the store and its patrons. A man wearing blue clothing can be seen interacting with another individual at the counter. Throughout the video, there are no significant changes to the situation; people continue waiting patiently while engaging in various activities such as looking at their phones or conversing with each other.", + "tokens_per_second": 6.8004162288880075, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 88, + "inference_time": 30.570102214813232, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 23.5, + 34.8, + 48.2, + 39.9 + ], + "request_time": 30.584243059158325 + }, + "sample_part_004.mp4": { + "response": "A man in a blue shirt and hat is working behind the counter of a convenience store. He is handling some items on the counter, possibly preparing or organizing them for sale. The time stamp indicates that this activity is taking place at 02:23 pm on May 5, 2025.", + "tokens_per_second": 5.72115634822065, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 65, + "inference_time": 29.02957820892334, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 18.2, + 59.9, + 52.6, + 16.0 + ], + "request_time": 29.044009685516357 + }, + "sample_part_005.mp4": { + "response": "A man wearing a blue hat is working at the counter of a convenience store. He takes an order from a customer and then searches for something on his computer screen. The shelves behind him are stocked with various items, including boxes of drinks.", + "tokens_per_second": 4.8389257523545774, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 50, + "inference_time": 28.024327278137207, + "cpu_usage": 37.2, + "cpu_core_utilization": [ + 18.8, + 20.2, + 51.9, + 57.8 + ], + "request_time": 28.038628816604614 + }, + "sample_part_006.mp4": { + "response": "A man in a blue shirt is standing at the counter of a convenience store, busy with some work on his computer. A long line has formed behind him as customers wait to be served. The camera captures an overhead view from above and also shows another angle through the window beside the cashier area.", + "tokens_per_second": 5.491475730408376, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 61, + "inference_time": 28.852471351623535, + "cpu_usage": 36.8, + "cpu_core_utilization": [ + 19.5, + 63.2, + 31.2, + 33.4 + ], + "request_time": 28.867112398147583 + }, + "sample_part_007.mp4": { + "response": "A woman in a blue shirt is working at the counter of a convenience store. She takes an order from a customer and then processes it, possibly scanning items or handling payment. The scene captures a typical transaction taking place within a retail environment, focusing on the interaction between the employee and the customer.", + "tokens_per_second": 5.435457465325606, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 60, + "inference_time": 28.745087385177612, + "cpu_usage": 37.6, + "cpu_core_utilization": [ + 28.1, + 30.6, + 71.4, + 20.3 + ], + "request_time": 28.75893545150757 + }, + "sample_part_008.mp4": { + "response": "A person wearing a blue shirt enters the frame, walks forward to pick up an item from behind the counter and then exits. Another individual in yellow follows closely after them. The two people walk out of view towards the left side of the screen.", + "tokens_per_second": 4.895421462414148, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 51, + "inference_time": 28.192399501800537, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 27.1, + 35.4, + 25.4, + 61.8 + ], + "request_time": 28.207008123397827 + }, + "sample_part_009.mp4": { + "response": "A man in a blue shirt is standing at the counter of a convenience store. He picks up an item from behind him and hands it to someone who walks past him, exiting on frame left. The person who entered returns with another customer wearing a black jacket. They stand next to each other near the cashier for a moment before both walk out together towards frame right.", + "tokens_per_second": 6.246345549948331, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 76, + "inference_time": 29.955488204956055, + "cpu_usage": 36.8, + "cpu_core_utilization": [ + 36.0, + 58.0, + 37.8, + 15.2 + ], + "request_time": 29.96996259689331 + }, + "sample_part_010.mp4": { + "response": "The video captures a sequence inside a convenience store, focusing on the counter area where transactions are taking place. Initially, there is no visible customer interaction with the cashier. As time progresses, an individual approaches the counter and engages in some activity that appears to be related to making a purchase or inquiry. The person at the counter interacts with this newcomer, possibly handling items or providing assistance. Throughout the clip, various products can be seen displayed behind the counter, indicating typical merchandise found in such stores. There are no significant movements or changes in the environment apart from these interactions.", + "tokens_per_second": 7.823522467010842, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 119, + "inference_time": 32.94475436210632, + "cpu_usage": 35.7, + "cpu_core_utilization": [ + 52.5, + 23.9, + 22.6, + 43.7 + ], + "request_time": 32.959415435791016 + }, + "sample_part_011.mp4": { + "response": "A man wearing a blue shirt is working at the counter of a convenience store. He is taking an order from a customer who sits on a chair in front of him. The customer wears a white hat and has black hair. On the right side of the frame, there are many hats stacked together.", + "tokens_per_second": 5.483772695220153, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 61, + "inference_time": 28.899153232574463, + "cpu_usage": 37.2, + "cpu_core_utilization": [ + 18.5, + 41.5, + 21.7, + 67.3 + ], + "request_time": 28.91345238685608 + }, + "sample_part_012.mp4": { + "response": "The footage captures a scene inside a convenience store, focusing on the checkout counter area. A man in blue attire is seen diligently scanning items for customers. The shelves behind him are well-stocked with various products. Throughout the sequence of frames, there are no significant changes or movements apart from the continuous activity at the cashier's desk and the customer interacting with the clerk.", + "tokens_per_second": 6.375310581123721, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 79, + "inference_time": 30.1279034614563, + "cpu_usage": 37.0, + "cpu_core_utilization": [ + 32.4, + 49.7, + 41.4, + 24.4 + ], + "request_time": 30.14226269721985 + }, + "sample_part_013.mp4": { + "response": "The footage captures a scene inside a convenience store. The primary focus is on the counter area where two employees are busy attending to customers and organizing items on shelves behind them. Throughout the video, there are no significant changes in actions or positions of the individuals within the frame. Both staff members remain focused on their tasks with minimal interaction between each other and with any potential customers outside the captured view.", + "tokens_per_second": 6.371663181689808, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 79, + "inference_time": 30.188050508499146, + "cpu_usage": 38.7, + "cpu_core_utilization": [ + 25.3, + 24.5, + 51.5, + 53.1 + ], + "request_time": 30.204052448272705 + }, + "sample_part_014.mp4": { + "response": "In the video, we observe a convenience store with two individuals interacting. The first person is wearing blue and appears to be operating some sort of machine or device at the counter while holding an item in their hand. The second individual, dressed in yellow, stands behind the counter facing the customer. Throughout the sequence, there are no significant changes in actions or positions by either individual.", + "tokens_per_second": 6.242465092882211, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 76, + "inference_time": 29.934317111968994, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 43.4, + 16.4, + 27.8, + 58.6 + ], + "request_time": 29.949224710464478 + }, + "sample_part_015.mp4": { + "response": "The video shows a convenience store scene where a person wearing blue clothes is interacting with another individual. The setting includes shelves stocked with various items and the timestamp indicates that this event takes place on May 5, 2025 at around 2:33 PM. Throughout the sequence of images provided, there are no significant changes in actions or positions of the individuals involved; they remain engaged in their interaction near the counter area.", + "tokens_per_second": 6.8003448292571615, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 89, + "inference_time": 30.821718215942383, + "cpu_usage": 36.4, + "cpu_core_utilization": [ + 76.2, + 16.3, + 36.4, + 16.7 + ], + "request_time": 30.83825135231018 + }, + "sample_part_016.mp4": { + "response": "In the video, a man wearing black clothes enters a convenience store and approaches the counter. The staff member at the counter is busy taking an order from another customer who is standing nearby. As time passes, more customers begin to arrive one after another, causing a line to form in front of the counter. Meanwhile, the first person continues his interaction with the cashier behind the counter.", + "tokens_per_second": 6.328218691218011, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 78, + "inference_time": 30.102893352508545, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 24.4, + 22.4, + 15.9, + 83.2 + ], + "request_time": 30.116969108581543 + }, + "sample_part_017.mp4": { + "response": "A man in a blue shirt is standing at the counter of a convenience store, facing another person who appears to be making a transaction. Behind them are shelves stocked with various products. The scene captures the interaction between the two individuals and the items available for purchase within the store setting.", + "tokens_per_second": 5.366089856961263, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 59, + "inference_time": 28.72538161277771, + "cpu_usage": 37.3, + "cpu_core_utilization": [ + 35.5, + 35.8, + 41.0, + 36.9 + ], + "request_time": 28.740490198135376 + }, + "sample_part_018.mp4": { + "response": "In the video, we observe a convenience store scenario where numerous individuals are seated in rows. A person is consistently seen attending to them from behind a counter on the left side of the frame. This individual appears focused on handling items or transactions with those seated. Throughout the sequence, there are no significant changes in actions or positions of people; they remain seated and engaged without noticeable movement or interaction between themselves. The setting remains static as well, with all objects maintaining their original placement throughout the observed duration.", + "tokens_per_second": 7.211379440337794, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 100, + "inference_time": 31.647137880325317, + "cpu_usage": 36.4, + "cpu_core_utilization": [ + 47.5, + 23.3, + 51.7, + 23.0 + ], + "request_time": 31.661863088607788 + }, + "sample_part_019.mp4": { + "response": "A convenience store with a long counter filled with many goods. There are two people in blue shirts behind the counter, one of whom is busy handling something on the counter and talking to another person who walks up to them. The other man in a black shirt stands at the end of the counter looking around.", + "tokens_per_second": 5.540220507070647, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 62, + "inference_time": 28.936497449874878, + "cpu_usage": 37.0, + "cpu_core_utilization": [ + 33.0, + 54.7, + 43.2, + 17.3 + ], + "request_time": 28.950819730758667 + }, + "sample_part_020.mp4": { + "response": "In this footage, we observe a bustling convenience store environment. A customer is seen browsing through the items on display while another person appears to be assisting them at the counter. The scene captures various interactions between the customers and staff as they navigate the offerings available in the store. Throughout the video, there are no significant changes or movements apart from these ongoing transactions.", + "tokens_per_second": 6.098932177607684, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 73, + "inference_time": 29.66199231147766, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 19.0, + 31.2, + 54.7, + 42.7 + ], + "request_time": 29.67595863342285 + }, + "sample_part_021.mp4": { + "response": "A man in a blue shirt is sitting at the counter of a convenience store. There are many customers standing around him, waiting their turn to buy something. The man takes out some money from his pocket and hands it over to one of the customers who walks away with an item.", + "tokens_per_second": 5.256702476105206, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 57, + "inference_time": 28.576371431350708, + "cpu_usage": 37.3, + "cpu_core_utilization": [ + 41.7, + 37.5, + 52.9, + 17.2 + ], + "request_time": 28.591065645217896 + }, + "sample_part_022.mp4": { + "response": "A person in a blue shirt is bending down and picking up an item from the floor.", + "tokens_per_second": 2.3146754530784004, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 19, + "inference_time": 25.850362539291382, + "cpu_usage": 38.1, + "cpu_core_utilization": [ + 69.0, + 33.5, + 30.6, + 19.4 + ], + "request_time": 25.865066051483154 + }, + "sample_part_023.mp4": { + "response": "A man in a blue shirt walks into the frame. A woman is bent over behind the counter, and another person enters from the left side of the frame. The two people who enter walk to the right corner of the store and stand near the shelves.", + "tokens_per_second": 4.957384579566463, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 52, + "inference_time": 28.244088172912598, + "cpu_usage": 37.5, + "cpu_core_utilization": [ + 33.0, + 50.8, + 47.3, + 18.9 + ], + "request_time": 28.259082794189453 + }, + "sample_part_024.mp4": { + "response": "The video captures a scene inside a convenience store with several individuals wearing blue shirts, likely employees. The timestamp indicates it is 2:13 PM on May 5, 2025. Initially, the customers are lined up at the counter in an orderly manner while the staff members attend to them. As time progresses, one of the workers moves towards the shelves and starts organizing items. Throughout the sequence, there are no significant changes in the customer lines or their actions apart from minor movements as they wait for service.", + "tokens_per_second": 7.516056826941947, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 109, + "inference_time": 32.201098680496216, + "cpu_usage": 35.6, + "cpu_core_utilization": [ + 33.4, + 36.1, + 32.6, + 40.1 + ], + "request_time": 32.21550107002258 + }, + "sample_part_025.mp4": { + "response": "In the video, we see a convenience store with many customers inside. The employees are busy attending to their tasks and interacting with each other. One employee is seen taking something from behind the counter while another stands up in front of him.", + "tokens_per_second": 4.695294352922656, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 48, + "inference_time": 27.938767671585083, + "cpu_usage": 37.5, + "cpu_core_utilization": [ + 52.4, + 21.4, + 57.3, + 18.7 + ], + "request_time": 27.952880382537842 + }, + "sample_part_026.mp4": { + "response": "The video depicts a scene inside a convenience store where numerous individuals are gathered. A man in blue is consistently seen at the counter, diligently handling various tasks such as taking money and organizing items on display. Throughout the sequence, there is no significant change in the number of people present or their actions. The primary activity revolves around transactions being processed by the individual behind the counter amidst a static crowd of customers waiting for service.", + "tokens_per_second": 6.726536746978091, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 87, + "inference_time": 30.642447471618652, + "cpu_usage": 36.4, + "cpu_core_utilization": [ + 24.0, + 59.4, + 17.0, + 45.3 + ], + "request_time": 30.657237768173218 + }, + "sample_part_027.mp4": { + "response": "In the footage, a person wearing blue is seen standing at a counter inside a convenience store. Throughout the sequence of images, this individual appears to be engaged in some activity involving a cash register or similar equipment on the counter. The surrounding area includes shelves stocked with various items and another part of the store visible through an open doorway. There are no significant changes in actions or environment observed between these moments.", + "tokens_per_second": 6.553781610815721, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 83, + "inference_time": 30.343724727630615, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 32.0, + 56.7, + 38.4, + 18.8 + ], + "request_time": 30.358335494995117 + }, + "sample_part_028.mp4": { + "response": "The video depicts a busy scene at a convenience store. A customer is seen waiting in line, surrounded by numerous other customers and staff members who are all wearing blue shirts. The employees appear to be engaged in various tasks such as handling transactions, organizing items on the shelves, and interacting with each other. Throughout the sequence, there is no significant movement or change in actions from the individuals present except for slight adjustments made by some of them.", + "tokens_per_second": 6.759111802459959, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 88, + "inference_time": 30.698819160461426, + "cpu_usage": 36.3, + "cpu_core_utilization": [ + 31.2, + 20.1, + 76.9, + 17.1 + ], + "request_time": 30.714816331863403 + }, + "sample_part_029.mp4": { + "response": "In this footage, we see a bustling convenience store where several individuals are engaged in various activities. At the counter, one person is busy handling transactions using a cash register, while another individual stands nearby, possibly assisting or waiting for their turn. The scene captures multiple customers standing at the checkout line, each engrossed in their mobile phones. Their attention seems divided between engaging with their devices and interacting with the staff behind the counter.\n\nThe atmosphere appears to be quite hectic as these people wait patiently in line, indicating either a peak shopping time or an exceptionally popular product being sold. On the right side of the frame, there's a wall adorned with posters that add context to the environment but do not interact directly with the ongoing activities at the counter. Overall, the video provides a snapshot of everyday life within a retail setting, highlighting both customer behavior and operational aspects of the business.", + "tokens_per_second": 9.263214649215994, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 184, + "inference_time": 37.69326829910278, + "cpu_usage": 34.5, + "cpu_core_utilization": [ + 53.9, + 19.4, + 24.3, + 40.4 + ], + "request_time": 37.707125663757324 + }, + "sample_part_030.mp4": { + "response": "A man wearing a blue shirt is arranging items on the counter of a convenience store. A woman enters and approaches him, engaging in conversation while he continues to work at the cash register.", + "tokens_per_second": 4.063290188399236, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 39, + "inference_time": 27.340353965759277, + "cpu_usage": 37.5, + "cpu_core_utilization": [ + 50.1, + 20.2, + 61.0, + 18.7 + ], + "request_time": 27.354116201400757 + }, + "sample_part_031.mp4": { + "response": "In the video, a man wearing blue clothes is seen working at a counter in what appears to be a convenience store. He seems focused on his task as he interacts with various items and possibly customers or other staff members around him. Throughout the sequence of frames, there are no significant changes observed in the scene. The environment remains consistent with typical elements found in such stores like shelves stocked with products, and the man continues his work without any notable alterations in actions or interactions within the setting.", + "tokens_per_second": 7.1731396320969125, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 99, + "inference_time": 31.4871187210083, + "cpu_usage": 36.2, + "cpu_core_utilization": [ + 35.7, + 20.6, + 19.6, + 68.6 + ], + "request_time": 31.500434637069702 + }, + "sample_part_032.mp4": { + "response": "A person in a blue shirt is working behind the counter of a convenience store. The camera captures an overhead view showing many people standing on one side, waiting for their turn to purchase items from the shelves directly across from them. Throughout the video, there are no significant changes or movements among the individuals except for slight adjustments made by the cashier and minor head turns from those who are queued up.", + "tokens_per_second": 6.459775499170554, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 81, + "inference_time": 30.219013929367065, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 40.0, + 21.9, + 46.6, + 37.5 + ], + "request_time": 30.23338007926941 + }, + "sample_part_033.mp4": { + "response": "In the video, we see a woman in blue clothing working at a counter inside what appears to be a convenience store. She is handling some items on the table and interacting with customers who are lined up behind her. The scene takes place during daytime as indicated by the timestamp shown in the top left corner of the screen which reads 03-05-2025 Wed 02:51:48. As time progresses, more people join the queue, indicating that there may have been an increase in demand or perhaps a special event happening outside the frame. Throughout the sequence, both the workers and customers remain stationary, suggesting that this might not be particularly busy yet but rather preparing for increased activity.", + "tokens_per_second": 8.532561247112476, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 147, + "inference_time": 34.96639823913574, + "cpu_usage": 35.2, + "cpu_core_utilization": [ + 16.2, + 20.7, + 47.4, + 56.6 + ], + "request_time": 34.98181343078613 + }, + "sample_part_034.mp4": { + "response": "A group of individuals are standing in a narrow aisle, seemingly engaged in conversation. The person on the right side is holding something up to their face. As time progresses, one individual wearing blue clothing moves from left to right across the frame and interacts with another individual at the counter. This second individual also changes positions slightly while handling items behind the counter.", + "tokens_per_second": 6.093386425880963, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 73, + "inference_time": 29.669122457504272, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 47.7, + 65.3, + 17.1, + 16.5 + ], + "request_time": 29.68289351463318 + }, + "sample_part_035.mp4": { + "response": "The video depicts a scene inside a convenience store where multiple individuals are gathered. A person wearing blue is seen working at the counter, possibly handling transactions or assisting customers. The group of people appears to be waiting in line behind the counter, with their heads and upper bodies visible above the countertop filled with various items such as snacks and beverages. As time progresses, one individual leaves the frame while another enters from the left side. Throughout the sequence, there are no significant changes in actions or interactions among the remaining individuals; they continue standing closely together without noticeable movement other than slight shifts in posture.", + "tokens_per_second": 7.846984662180754, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 120, + "inference_time": 33.08664679527283, + "cpu_usage": 35.7, + "cpu_core_utilization": [ + 14.8, + 15.5, + 59.9, + 52.6 + ], + "request_time": 33.100858211517334 + }, + "sample_part_036.mp4": { + "response": "A man in a blue shirt enters the convenience store, approaches the counter, and interacts with the cashier. The customer appears to be making a purchase or inquiry while the cashier handles transactions at the register.", + "tokens_per_second": 4.356472577241336, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 43, + "inference_time": 27.591076135635376, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 20.9, + 70.7, + 18.3, + 40.0 + ], + "request_time": 27.60534977912903 + }, + "sample_part_037.mp4": { + "response": "The video depicts a convenience store scene where numerous individuals are seated on the floor. They appear to be engaged in various activities such as conversing with each other or looking at their phones. The setting is consistent throughout, showing no significant changes or movements from start to finish.", + "tokens_per_second": 5.138801059166309, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 55, + "inference_time": 28.532546758651733, + "cpu_usage": 37.3, + "cpu_core_utilization": [ + 47.6, + 38.0, + 17.4, + 46.2 + ], + "request_time": 28.54769778251648 + }, + "sample_part_038.mp4": { + "response": "A person is pushing a shopping cart with items inside. Another individual enters the scene and walks towards the counter, where they interact with someone behind it.", + "tokens_per_second": 3.425754264592105, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 31, + "inference_time": 26.758867979049683, + "cpu_usage": 37.7, + "cpu_core_utilization": [ + 23.2, + 51.9, + 42.6, + 33.3 + ], + "request_time": 26.77294373512268 + }, + "sample_part_039.mp4": { + "response": "In the video, a man wearing blue clothes is standing in front of a counter with many people. The customer interacts with the cashier and receives an item from her. After that, he leaves the store while carrying the received object.", + "tokens_per_second": 4.69602386319334, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 48, + "inference_time": 27.875003814697266, + "cpu_usage": 37.3, + "cpu_core_utilization": [ + 51.3, + 17.6, + 19.3, + 60.6 + ], + "request_time": 27.889028310775757 + }, + "sample_part_040.mp4": { + "response": "The video captures a sequence inside a convenience store. It begins with the camera focusing on numerous individuals seated in what appears to be a queue or waiting area within the store, all facing towards the left side of the frame. The setting includes shelves stocked with various products and an overall busy atmosphere indicative of typical retail operations.\n\nAs time progresses, one person is seen standing up from their seat near the counter area, possibly indicating they have finished interacting with a staff member at the register. This individual moves closer to the cashier station where another person seems to engage in some activity involving a computer or point-of-sale system. Throughout these events, there are no significant changes in the environment or additional people entering the scene; the primary action revolves around this single customer's interaction with the store staff.", + "tokens_per_second": 8.923322929883941, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 165, + "inference_time": 36.23391890525818, + "cpu_usage": 34.9, + "cpu_core_utilization": [ + 26.5, + 17.6, + 53.3, + 42.2 + ], + "request_time": 36.24828910827637 + }, + "sample_part_041.mp4": { + "response": "The footage captures a bustling convenience store scene. The primary focus is on the counter area where two employees are diligently working, one handling transactions and the other attending to tasks at the back of the counter. A long line of customers extends from the entrance towards the checkout counter, each waiting patiently for their turn. The video provides an overhead view that allows us to observe this organized queue system in action within the confines of the store.", + "tokens_per_second": 6.836469983622892, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 90, + "inference_time": 31.025818347930908, + "cpu_usage": 36.3, + "cpu_core_utilization": [ + 30.2, + 49.8, + 43.0, + 22.1 + ], + "request_time": 31.04036808013916 + }, + "sample_part_042.mp4": { + "response": "A man in a blue shirt is standing behind the counter of a convenience store. He appears to be talking on his phone and then looks up from it. There are many people sitting in chairs, facing him. They seem to have been waiting for some time as they look impatient. The scene takes place during daytime hours.", + "tokens_per_second": 5.799046418435553, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 67, + "inference_time": 29.234259366989136, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 52.7, + 42.9, + 16.9, + 34.0 + ], + "request_time": 29.250171184539795 + }, + "sample_part_043.mp4": { + "response": "The video shows a convenience store scenario with two individuals. One person is wearing blue clothes and the other one has yellow clothes. The individual in blue is consistently seen handling items on the counter, presumably preparing or organizing them for sale. Meanwhile, the person dressed in yellow enters from the left side of the frame and starts interacting with the merchandise at the same time as the first person. As they work together near the front area of the store, various products are visible around them, indicating that this might be an interaction related to restocking or customer service tasks within the retail environment.", + "tokens_per_second": 7.766284842271324, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 117, + "inference_time": 32.91955852508545, + "cpu_usage": 35.7, + "cpu_core_utilization": [ + 16.6, + 43.3, + 58.1, + 24.8 + ], + "request_time": 32.93333029747009 + }, + "sample_part_044.mp4": { + "response": "A man wearing a blue shirt is standing in front of the counter. There are many people sitting on chairs and benches behind him, most of them looking at their mobile phones. The store is crowded with customers inside.", + "tokens_per_second": 4.5631272946545245, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 46, + "inference_time": 27.838181972503662, + "cpu_usage": 37.3, + "cpu_core_utilization": [ + 25.7, + 32.6, + 18.2, + 72.5 + ], + "request_time": 27.852051258087158 + }, + "sample_part_045.mp4": { + "response": "The video shows a convenience store with several customers inside. The scene is captured from the top, providing an overhead view of the situation. There are numerous people in line at the counter, indicating that they are waiting to make purchases or receive service. A person wearing blue clothing appears to be working behind the counter, possibly as a store employee attending to customer needs and transactions. Throughout the sequence, there are no significant changes observed; the individuals maintain their positions relative to each other without noticeable movement or action variation.", + "tokens_per_second": 7.282653773730579, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 102, + "inference_time": 31.869735956192017, + "cpu_usage": 36.1, + "cpu_core_utilization": [ + 40.4, + 17.4, + 32.9, + 53.5 + ], + "request_time": 31.886656761169434 + }, + "sample_part_046.mp4": { + "response": "The video captures a sequence of events inside a convenience store. Initially, the scene is static with no visible activity. Shortly after, an individual wearing blue enters from the left and begins interacting with items on the counter. The person appears to be organizing or examining products placed there. Throughout the remainder of the clip, this individual continues their task at the counter without significant changes in behavior or position. Another person briefly walks by towards the end of the observed timeframe but does not engage with any specific actions within the frame.", + "tokens_per_second": 7.413608779975096, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 106, + "inference_time": 32.04073524475098, + "cpu_usage": 36.2, + "cpu_core_utilization": [ + 15.1, + 15.9, + 55.1, + 58.5 + ], + "request_time": 32.05498027801514 + }, + "sample_part_047.mp4": { + "response": "The video captures a scene inside a convenience store, presumably from a security camera given the timestamp and \"camera 04\" label. The footage is dated Wednesday, May 5th, 2025, at various times throughout the minute.\n\nInitially, we see an empty counter with several items on it before a person enters the frame and approaches the counter. This individual appears to be interacting with the products or perhaps preparing to make a purchase. As time progresses, this customer begins placing multiple shopping bags onto the counter, indicating they are either purchasing many items or returning something that needs to be restocked. Each bag contains different types of goods; some appear to contain snacks while others have larger packages possibly containing household essentials or other retail items.\n\nThroughout the sequence, there are no significant changes in the environment apart from the addition of these bags which gradually accumulate on the counter. No interactions between customers or staff members can be seen within this segment of the video as all focus remains solely on the actions involving the placement of the bags.", + "tokens_per_second": 9.744964420585076, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 217, + "inference_time": 40.15877413749695, + "cpu_usage": 34.8, + "cpu_core_utilization": [ + 16.9, + 39.2, + 15.3, + 67.8 + ], + "request_time": 40.172688484191895 + }, + "sample_part_048.mp4": { + "response": "A person is standing in front of the counter. The other people are sitting on chairs, with some holding mobile phones and others looking at items on shelves or talking to each other. A man wearing a blue shirt walks into frame from behind the store, turns around, and begins organizing products on the shelf.", + "tokens_per_second": 5.534022820709767, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 62, + "inference_time": 28.964895486831665, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 64.7, + 22.2, + 29.9, + 30.5 + ], + "request_time": 28.979894876480103 + }, + "sample_part_049.mp4": { + "response": "The video captures a scene inside a convenience store where numerous customers are standing in line. The footage is taken from an overhead perspective, providing a clear view of the entire queue extending down the length of the store. Throughout the sequence, individuals can be seen patiently waiting their turn to make purchases. There are no significant changes or actions occurring apart from these steady lines of people awaiting service at the checkout counters.", + "tokens_per_second": 6.507467905304537, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 82, + "inference_time": 30.294554233551025, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 53.7, + 28.6, + 33.4, + 30.8 + ], + "request_time": 30.308894634246826 + }, + "sample_part_050.mp4": { + "response": "In the video, a person is seen sitting in front of an escalator. The camera captures various angles and movements as time progresses.", + "tokens_per_second": 3.2541179562626197, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 29, + "inference_time": 26.652374744415283, + "cpu_usage": 37.9, + "cpu_core_utilization": [ + 48.0, + 17.9, + 20.3, + 65.1 + ], + "request_time": 26.667096853256226 + }, + "sample_part_051.mp4": { + "response": "In this convenience store, a customer is waiting in line. The employee behind the counter begins to pack items into boxes for her customers one by one as they approach the checkout area.", + "tokens_per_second": 3.912398850108666, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 37, + "inference_time": 27.17803382873535, + "cpu_usage": 37.2, + "cpu_core_utilization": [ + 62.7, + 17.9, + 17.4, + 50.5 + ], + "request_time": 27.19367265701294 + }, + "sample_part_052.mp4": { + "response": "In this convenience store, a customer is seen repeatedly attempting to buy something from the cashier. The transaction appears to be taking place in front of several other customers who are standing close together, suggesting that there may be limited space or queueing at the time. Throughout the sequence, the cashier interacts with both the main customer and another person near them, possibly assisting with transactions or providing information. Despite multiple attempts by the primary customer to make a purchase, no significant changes occur within their immediate surroundings.", + "tokens_per_second": 7.248168780638238, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 101, + "inference_time": 31.688145875930786, + "cpu_usage": 36.1, + "cpu_core_utilization": [ + 60.1, + 41.3, + 16.2, + 26.8 + ], + "request_time": 31.70526361465454 + }, + "sample_part_053.mp4": { + "response": "In the video, a woman wearing blue is working in a convenience store. She stands near the counter and interacts with customers or handles transactions at the register. The scene shows several seats where people are seated, possibly waiting for their turn to make purchases or simply resting while shopping. Throughout the sequence of images, there are no significant changes observed apart from slight movements indicating ongoing activities within the store environment.", + "tokens_per_second": 6.420566489349132, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 80, + "inference_time": 30.21658205986023, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 46.5, + 21.6, + 59.6, + 18.1 + ], + "request_time": 30.230469465255737 + }, + "sample_part_054.mp4": { + "response": "A man in a blue shirt is working at the counter of a convenience store. There are many people standing on both sides of him, waiting for their turn to make purchases. The customers appear to be queuing patiently and are mostly looking down or away from the camera.", + "tokens_per_second": 5.19902103533599, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 56, + "inference_time": 28.48878502845764, + "cpu_usage": 37.6, + "cpu_core_utilization": [ + 49.0, + 34.0, + 48.4, + 19.0 + ], + "request_time": 28.502393007278442 + }, + "sample_part_055.mp4": { + "response": "In this 1-minute video clip, we see a convenience store with customers standing in line. The main focus is on the interaction between the employees and the customers at the counter. One employee wearing blue attire appears to be handling transactions or assisting the customer while another person moves around behind the counter. There are no significant changes throughout the sequence as the scene remains consistent, depicting routine activities within the store during busy hours.", + "tokens_per_second": 6.72473013041542, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 87, + "inference_time": 30.703668117523193, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 35.2, + 45.0, + 37.4, + 28.4 + ], + "request_time": 30.717674016952515 + }, + "sample_part_056.mp4": { + "response": "A man wearing a blue shirt is at the counter of a convenience store. He appears to be ringing up items and interacting with customers who are lined up in front of him, waiting for their purchases to be completed. The scene takes place during business hours as indicated by the timestamp on the video.", + "tokens_per_second": 5.483718629411901, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 61, + "inference_time": 28.888518810272217, + "cpu_usage": 37.1, + "cpu_core_utilization": [ + 27.1, + 19.3, + 43.8, + 58.1 + ], + "request_time": 28.903250455856323 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/minicpm/20250821_002447/Clips_60s.json b/API_Transformers/output_0821/minicpm/20250821_002447/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..d16534b12e5e842a280875a8346059c78500fcfa --- /dev/null +++ b/API_Transformers/output_0821/minicpm/20250821_002447/Clips_60s.json @@ -0,0 +1,92 @@ +{ + "sample_part_001.mp4": { + "response": "In the video, a woman wearing blue clothes is standing in front of an escalator. She picks up items from the shelves and places them into her shopping bag. Another person appears at the bottom left corner of the frame, seemingly trying to get something out of the store. The scene takes place during daytime inside a building with bright lighting conditions.", + "tokens_per_second": 5.985183148455991, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 70, + "inference_time": 33.24836874008179, + "cpu_usage": 37.3, + "cpu_core_utilization": [ + 37.0, + 39.5, + 36.4, + 36.2 + ], + "request_time": 33.30821442604065 + }, + "sample_part_002.mp4": { + "response": "A man is seen wearing a blue shirt and black pants. He bends over to pick something up from the floor, then walks away with it in his hand.", + "tokens_per_second": 3.7285647057248625, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 34, + "inference_time": 29.67774724960327, + "cpu_usage": 50.6, + "cpu_core_utilization": [ + 56.4, + 62.1, + 35.5, + 48.3 + ], + "request_time": 29.69798707962036 + }, + "sample_part_003.mp4": { + "response": "In the footage, a person wearing blue clothes is seen arranging hats on a rack in what appears to be a convenience store. The individual methodically places each hat neatly onto the display stand. As the video progresses, various types of hats become visible as they are systematically organized. Throughout this process, no other individuals enter or leave the scene, and there are no significant changes observed in the environment or background elements such as posters on the wall.", + "tokens_per_second": 6.963268555022767, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 92, + "inference_time": 31.193055629730225, + "cpu_usage": 38.1, + "cpu_core_utilization": [ + 30.8, + 32.9, + 49.7, + 38.7 + ], + "request_time": 31.21116042137146 + }, + "sample_part_004.mp4": { + "response": "In the video, a man is seen cleaning an umbrella rack in a store. He starts by picking up items from various locations on the rack and then places them back down neatly. The process continues as he methodically organizes the umbrellas, ensuring they are aligned properly. His actions appear to be focused and deliberate, suggesting that he takes pride in his work. Throughout the sequence, there are no other individuals present in the frame, allowing for a clear view of his task.", + "tokens_per_second": 7.242406371874894, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 100, + "inference_time": 31.552570343017578, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 47.2, + 19.7, + 61.2, + 17.9 + ], + "request_time": 31.567636013031006 + }, + "sample_part_005.mp4": { + "response": "The video shows a person wearing a blue shirt and cap working at the counter of a convenience store. The individual is handling various items, possibly organizing or preparing them for sale. Throughout the sequence, there are no significant changes in the actions of the person or the environment around the counter area.", + "tokens_per_second": 5.385284609810389, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 59, + "inference_time": 28.649865865707397, + "cpu_usage": 37.3, + "cpu_core_utilization": [ + 20.6, + 21.4, + 90.0, + 16.9 + ], + "request_time": 28.664432048797607 + }, + "sample_part_006.mp4": { + "response": "The video shows a person wearing blue clothes and gloves placing hats on the rack in a convenience store. The scene takes place at 10:24 PM, as indicated by the timestamps visible throughout the footage.", + "tokens_per_second": 4.504682210102835, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 45, + "inference_time": 27.6962833404541, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 27.3, + 19.3, + 52.1, + 50.8 + ], + "request_time": 27.711094617843628 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/minicpm/20250821_010040/Clips_60s.json b/API_Transformers/output_0821/minicpm/20250821_010040/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..2aba616269c1cacd1639d74e90dbc272afd6e462 --- /dev/null +++ b/API_Transformers/output_0821/minicpm/20250821_010040/Clips_60s.json @@ -0,0 +1,767 @@ +{ + "sample_part_001.mp4": { + "response": "The video depicts a crowded convenience store where several individuals are interacting. One person, wearing blue clothing and gloves, is seen reaching for an item on the shelf while others move around in the background. The scene suggests a busy environment with people possibly shopping or grabbing items quickly.", + "tokens_per_second": 5.106639419779758, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 56, + "inference_time": 33.65660357475281, + "cpu_usage": 16.3, + "cpu_core_utilization": [ + 16.0, + 13.4, + 20.9, + 14.9 + ], + "request_time": 33.771279096603394 + }, + "sample_part_002.mp4": { + "response": "The video shows a convenience store with various items on display. A woman wearing blue clothing is seen arranging and handling the merchandise inside the store. She moves around, picking up different objects from shelves and placing them in specific locations within the store. The focus remains on her actions as she organizes the products throughout the duration of the clip.", + "tokens_per_second": 5.976602351659928, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 69, + "inference_time": 29.21415114402771, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 52.9, + 47.3, + 16.7, + 30.9 + ], + "request_time": 29.228939533233643 + }, + "sample_part_003.mp4": { + "response": "The video shows a convenience store with various items on display. A person in blue clothing is seen arranging and putting away the displayed hats, appearing to be organizing or cleaning up the area. The timestamp indicates that this activity occurs over a span of 1 minute.", + "tokens_per_second": 5.11842779774044, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 54, + "inference_time": 28.203431129455566, + "cpu_usage": 37.6, + "cpu_core_utilization": [ + 51.0, + 23.2, + 57.8, + 18.2 + ], + "request_time": 28.21802854537964 + }, + "sample_part_004.mp4": { + "response": "The video depicts a convenience store scene where an employee is diligently cleaning the shelves. The individual, dressed in blue attire and wearing a hat, moves systematically along the rows of products to wipe them down with a yellow cloth. Throughout the sequence, there are no customers present in the frame, emphasizing the focus on the task at hand. The setting appears well-lit, likely by indoor lighting common in retail environments, ensuring that all areas being cleaned are clearly visible.", + "tokens_per_second": 7.094428465980155, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 96, + "inference_time": 31.255270957946777, + "cpu_usage": 36.1, + "cpu_core_utilization": [ + 16.6, + 46.3, + 15.5, + 66.2 + ], + "request_time": 31.270313262939453 + }, + "sample_part_005.mp4": { + "response": "In the video, a person wearing blue clothes is seen placing items on display in what appears to be an indoor store. The setting looks like a convenience or retail shop with various products visible around them. The individual seems focused on arranging or adding merchandise to the shelves.", + "tokens_per_second": 5.149733378245225, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 55, + "inference_time": 28.503536462783813, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 19.1, + 93.3, + 16.5, + 20.5 + ], + "request_time": 28.51807713508606 + }, + "sample_part_006.mp4": { + "response": "A person in a blue shirt picks up an item from the counter.", + "tokens_per_second": 1.8942455900034438, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 15, + "inference_time": 25.680493593215942, + "cpu_usage": 37.9, + "cpu_core_utilization": [ + 27.3, + 31.8, + 57.5, + 35.1 + ], + "request_time": 25.69572949409485 + }, + "sample_part_007.mp4": { + "response": "A woman in a blue shirt enters the convenience store and starts to organize items on display.", + "tokens_per_second": 2.3168022945047397, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 19, + "inference_time": 25.99376630783081, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 22.5, + 61.1, + 17.4, + 50.2 + ], + "request_time": 26.00737762451172 + }, + "sample_part_008.mp4": { + "response": "A woman enters the convenience store, looking around. She picks up an item and examines it before putting it back on the shelf. The camera captures her actions from a top-down perspective.", + "tokens_per_second": 4.072920119323832, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 39, + "inference_time": 27.38187527656555, + "cpu_usage": 37.7, + "cpu_core_utilization": [ + 26.2, + 41.3, + 45.1, + 38.4 + ], + "request_time": 27.395817041397095 + }, + "sample_part_009.mp4": { + "response": "A man in a blue shirt is arranging hats on the shelves of a convenience store.", + "tokens_per_second": 2.3167189804012382, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 19, + "inference_time": 26.005106687545776, + "cpu_usage": 38.1, + "cpu_core_utilization": [ + 21.9, + 49.8, + 55.8, + 25.0 + ], + "request_time": 26.019869565963745 + }, + "sample_part_010.mp4": { + "response": "In the video, a person wearing a blue shirt is seen placing an item on display. The setting appears to be inside a convenience store with various items hanging and displayed on racks.", + "tokens_per_second": 3.995493400475914, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 38, + "inference_time": 27.31991672515869, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 24.0, + 30.3, + 46.3, + 50.6 + ], + "request_time": 27.33413791656494 + }, + "sample_part_011.mp4": { + "response": "The video depicts a person in blue clothing organizing items on the shelves of a convenience store. The individual is seen placing objects into different sections, adjusting their positions to ensure everything is neatly arranged. Throughout the sequence, there are no other people or significant changes occurring within the frame. The primary activity revolves around the meticulous task of shelf organization by this single person.", + "tokens_per_second": 6.202907021110835, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 75, + "inference_time": 29.863797903060913, + "cpu_usage": 36.8, + "cpu_core_utilization": [ + 55.8, + 29.8, + 44.5, + 17.1 + ], + "request_time": 29.87814235687256 + }, + "sample_part_012.mp4": { + "response": "In the footage, a person clad in blue is seen methodically wiping down various items within what appears to be an indoor retail environment. The setting includes shelving units stocked with assorted merchandise and large posters or advertisements adorning the walls. Throughout the sequence, the individual's actions remain consistent as they systematically clean each item on display, maintaining focus on ensuring all surfaces are spotless.", + "tokens_per_second": 6.5597487765640565, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 83, + "inference_time": 30.373464345932007, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 25.1, + 23.9, + 51.7, + 45.5 + ], + "request_time": 30.388197422027588 + }, + "sample_part_013.mp4": { + "response": "In the video, a person is seen cleaning the glass window of what appears to be a convenience store. The individual starts by holding up a yellow cloth and begins wiping the surface methodically from top to bottom. As time progresses, they continue their task with consistent motion, ensuring every part of the window is attended to. Throughout this process, there are no other significant actions or interactions observed in the surroundings.", + "tokens_per_second": 6.599854347514273, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 84, + "inference_time": 30.522512674331665, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 68.2, + 18.6, + 42.7, + 18.1 + ], + "request_time": 30.536853790283203 + }, + "sample_part_014.mp4": { + "response": "The video shows a person wearing blue clothing inside a convenience store. The individual is seen reaching up towards the top of an extensive display rack filled with various items, including hats and other merchandise. Throughout the sequence, there are no significant changes in the scene; the focus remains on the action of the person as they interact with the products on the shelf.", + "tokens_per_second": 6.004141023516783, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 71, + "inference_time": 29.609714031219482, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 54.0, + 34.3, + 33.5, + 25.6 + ], + "request_time": 29.624605417251587 + }, + "sample_part_015.mp4": { + "response": "In the video, a man is standing in front of an open convenience store. He is wearing a blue shirt and appears to be engaged with something on his phone or camera that he has extended out towards one side of the shop display rack filled with various items such as caps and other small goods. Throughout the sequence of images, there are no significant changes observed in either the position of the man or the arrangement of items within the store. The scene remains consistent without any notable actions taking place apart from the man's interaction with whatever object he is holding up.", + "tokens_per_second": 7.644809074843396, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 113, + "inference_time": 32.58644247055054, + "cpu_usage": 35.7, + "cpu_core_utilization": [ + 26.1, + 37.3, + 48.7, + 30.7 + ], + "request_time": 32.600977659225464 + }, + "sample_part_016.mp4": { + "response": "A person is pushing a shopping cart in the convenience store. The items on display are hanging from the ceiling, and there is an escalator to the right side of the frame.", + "tokens_per_second": 3.917112300612553, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 37, + "inference_time": 27.219703674316406, + "cpu_usage": 37.9, + "cpu_core_utilization": [ + 26.2, + 35.0, + 54.6, + 35.9 + ], + "request_time": 27.23397445678711 + }, + "sample_part_017.mp4": { + "response": "A man in a blue shirt is standing at the counter of a convenience store. He appears to be placing an item on the shelf, which seems to be some sort of merchandise or product. The video captures this action from different angles and distances as he continues his task.", + "tokens_per_second": 5.140461789786328, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 55, + "inference_time": 28.39331316947937, + "cpu_usage": 36.8, + "cpu_core_utilization": [ + 30.2, + 35.9, + 49.2, + 32.1 + ], + "request_time": 28.40793776512146 + }, + "sample_part_018.mp4": { + "response": "The video depicts a person in blue clothing who is interacting with items on display at what appears to be a store. The sequence begins with the individual reaching out towards an object, possibly examining or selecting it from the assortment of goods arranged vertically along the wall. As the frames progress, there are slight changes in the position and angle of the hands and arms belonging to the person, indicating ongoing engagement with the merchandise. Throughout the observed duration, the environment remains consistent, focusing solely on the interaction between the person and the displayed products without any significant alterations in the scene or introduction of new elements.", + "tokens_per_second": 7.796685733470381, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 118, + "inference_time": 32.85233020782471, + "cpu_usage": 36.0, + "cpu_core_utilization": [ + 47.5, + 17.9, + 27.3, + 51.2 + ], + "request_time": 32.866013526916504 + }, + "sample_part_019.mp4": { + "response": "In this 1-minute footage, we observe a convenience store setting where two women are present. The woman on the left is initially seen placing items into her blue bag while standing next to an escalator. As time progresses, she moves towards the center of the frame and begins handling various hats displayed in front of her. Meanwhile, another woman appears from behind the display case and stands beside the first woman. Together, they seem to be organizing or arranging the hats for sale. Throughout the sequence, there are no significant changes in their actions or interactions with each other; both remain focused on managing the hat display area.", + "tokens_per_second": 8.04906132671884, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 127, + "inference_time": 33.56799626350403, + "cpu_usage": 35.7, + "cpu_core_utilization": [ + 28.8, + 35.7, + 39.0, + 39.3 + ], + "request_time": 33.58272337913513 + }, + "sample_part_020.mp4": { + "response": "The video shows a convenience store with an assortment of items on display. A person wearing blue clothing is seen interacting with the products, appearing to examine them closely. The individual seems to be browsing or possibly considering purchasing something from the shelves. Throughout the sequence, there are no significant changes in the environment or introduction of new elements, and the focus remains on this single shopper's actions within the store setting.", + "tokens_per_second": 6.551982232700224, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 83, + "inference_time": 30.410321474075317, + "cpu_usage": 36.7, + "cpu_core_utilization": [ + 35.2, + 27.6, + 62.1, + 22.0 + ], + "request_time": 30.424530267715454 + }, + "sample_part_021.mp4": { + "response": "In this convenience store, a man wearing blue clothes is seen searching for something inside the shelves. He bends down and moves items around in an attempt to find what he is looking for. The video captures his continuous effort as he shifts different objects on the shelf.", + "tokens_per_second": 5.068557863754886, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 54, + "inference_time": 28.43730902671814, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 24.8, + 47.0, + 16.6, + 61.0 + ], + "request_time": 28.451560258865356 + }, + "sample_part_022.mp4": { + "response": "In the video, a person is seen approaching a convenience store. They pause for a moment and then proceed to browse through various items displayed on shelves within the store. The individual appears focused on examining the products closely before eventually making a selection.", + "tokens_per_second": 4.765763711580632, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 49, + "inference_time": 27.97677779197693, + "cpu_usage": 37.6, + "cpu_core_utilization": [ + 43.1, + 34.8, + 55.9, + 16.7 + ], + "request_time": 27.991510152816772 + }, + "sample_part_023.mp4": { + "response": "In this 1-minute convenience store video clip, a person wearing blue clothing enters the frame and proceeds to climb up an aisle filled with hats. The individual manages to get their feet on top of the shelves holding the various items for sale. Despite several attempts to pull themselves up higher, they are unable to reach the upper part of the shelf where other products can be found. After multiple efforts to ascend the height of the hat display, the person finally succeeds in reaching the top and retrieves an item from there.", + "tokens_per_second": 7.483257212457778, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 108, + "inference_time": 32.22531199455261, + "cpu_usage": 36.1, + "cpu_core_utilization": [ + 26.1, + 27.6, + 47.6, + 43.3 + ], + "request_time": 32.239612102508545 + }, + "sample_part_024.mp4": { + "response": "A woman in blue clothes is leaning on the wall of a convenience store. She reaches down to pick up an item and then places it back on the shelf.", + "tokens_per_second": 3.676425320411625, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 34, + "inference_time": 27.02277135848999, + "cpu_usage": 38.5, + "cpu_core_utilization": [ + 40.1, + 28.5, + 57.3, + 28.2 + ], + "request_time": 27.037601470947266 + }, + "sample_part_025.mp4": { + "response": "In this video, a man is seen climbing into the top of an escalator. He appears to be wearing blue clothing and carrying some items with him. The scene takes place in what looks like a public transportation area, possibly near or inside an elevator shaft.", + "tokens_per_second": 5.01668206513031, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 53, + "inference_time": 28.364001035690308, + "cpu_usage": 37.5, + "cpu_core_utilization": [ + 29.8, + 41.1, + 41.8, + 37.3 + ], + "request_time": 28.378294467926025 + }, + "sample_part_026.mp4": { + "response": "In the video, a person is seen wearing a blue shirt and cap. They are standing in front of a display rack filled with various items such as hats and other merchandise. The individual appears to be interacting with these products by either examining them or organizing them on the shelf. Throughout the sequence, there are no significant changes in the environment or additional people entering the frame.", + "tokens_per_second": 6.198040985676184, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 75, + "inference_time": 29.83291721343994, + "cpu_usage": 37.0, + "cpu_core_utilization": [ + 26.8, + 32.5, + 30.5, + 58.1 + ], + "request_time": 29.847522020339966 + }, + "sample_part_027.mp4": { + "response": "The footage captures a moment inside a convenience store where an individual is interacting with the products on display. The person appears to be either browsing or selecting items from a rack filled with various goods, predominantly hats and caps of different colors and designs. Throughout the sequence, there are no significant changes in the environment or introduction of new objects. The primary activity revolves around this single shopper's engagement with the merchandise available within their reach.", + "tokens_per_second": 6.7633887422454855, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 88, + "inference_time": 30.76609969139099, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 64.3, + 25.4, + 39.5, + 16.6 + ], + "request_time": 30.781466007232666 + }, + "sample_part_028.mp4": { + "response": "The footage captures a person in blue attire meticulously organizing and arranging various items on shelves within a convenience store. The setting is characterized by the presence of numerous hats displayed prominently, along with other miscellaneous merchandise. Throughout the sequence, there are no significant changes or movements aside from the individual's actions as they handle different objects on the shelving unit.", + "tokens_per_second": 6.241116142632914, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 76, + "inference_time": 29.900090217590332, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 52.1, + 24.6, + 27.8, + 43.1 + ], + "request_time": 29.91452932357788 + }, + "sample_part_029.mp4": { + "response": "In the video, a person wearing blue clothes is seen reaching into a rack of hats. They take out several hats and place them on top of another stack of hats in front of them. The individual continues to select more hats from the display, adding them to the pile they are already managing.", + "tokens_per_second": 5.427872993526753, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 60, + "inference_time": 28.89626455307007, + "cpu_usage": 36.8, + "cpu_core_utilization": [ + 58.1, + 25.7, + 27.4, + 36.1 + ], + "request_time": 28.911091804504395 + }, + "sample_part_030.mp4": { + "response": "The footage captures a woman in blue attire inside a convenience store. Initially, she is seen interacting with the cashier area of the counter. She then proceeds to select items from various shelves and places them into her bag. Throughout the sequence, there are no other individuals visible in the frame apart from this one woman who appears focused on completing her transaction without any notable interactions or movements outside the scope of managing her purchase at the checkout.", + "tokens_per_second": 6.759735939024082, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 88, + "inference_time": 30.848559379577637, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 41.3, + 20.6, + 31.9, + 52.4 + ], + "request_time": 30.863301038742065 + }, + "sample_part_031.mp4": { + "response": "The video shows a person in a blue shirt inside a convenience store. They are standing behind the counter, interacting with something on it. The individual is consistently positioned throughout the sequence of images, and their actions appear to be focused on an activity at the counter without any significant changes or movements observed over time.", + "tokens_per_second": 5.539399356957513, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 62, + "inference_time": 28.922995567321777, + "cpu_usage": 37.0, + "cpu_core_utilization": [ + 36.2, + 29.1, + 66.1, + 16.6 + ], + "request_time": 28.938354969024658 + }, + "sample_part_032.mp4": { + "response": "A man in a blue shirt is working at the convenience store. He picks up items from shelves and places them into boxes on the counter.", + "tokens_per_second": 3.2576537369296608, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 29, + "inference_time": 26.580628395080566, + "cpu_usage": 38.1, + "cpu_core_utilization": [ + 48.7, + 36.2, + 31.6, + 36.0 + ], + "request_time": 26.598524808883667 + }, + "sample_part_033.mp4": { + "response": "In the video, a person wearing blue clothes is seen picking up an item from a shelf in what appears to be a convenience store. The individual bends down slightly and reaches for one of the items displayed on the top shelf. After successfully grabbing the object, they stand back up straight holding it in their hand. Throughout this sequence, there are no other significant movements or interactions observed within the frame.", + "tokens_per_second": 6.511908523166135, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 82, + "inference_time": 30.294382572174072, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 55.4, + 25.4, + 48.9, + 16.2 + ], + "request_time": 30.308947801589966 + }, + "sample_part_034.mp4": { + "response": "A person in a blue shirt is standing at the counter of a convenience store. They are reaching for an item on display and appear to be selecting or purchasing it. The items displayed include various merchandise such as hats, accessories, and possibly small goods typically found in a convenience store setting.", + "tokens_per_second": 5.322139736625115, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 58, + "inference_time": 28.59447956085205, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 41.1, + 43.3, + 45.5, + 19.4 + ], + "request_time": 28.608854293823242 + }, + "sample_part_035.mp4": { + "response": "A person in a blue shirt is seen picking up items from the shelf.", + "tokens_per_second": 2.001110574040108, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 16, + "inference_time": 25.772795915603638, + "cpu_usage": 38.4, + "cpu_core_utilization": [ + 35.6, + 36.8, + 32.8, + 48.4 + ], + "request_time": 25.78701138496399 + }, + "sample_part_036.mp4": { + "response": "In the video, a person wearing blue clothing is seen bending down in front of an upright rack filled with various items. The individual appears to be searching for something specific among the products displayed on the shelf.", + "tokens_per_second": 4.287825767821965, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 42, + "inference_time": 27.472771406173706, + "cpu_usage": 37.6, + "cpu_core_utilization": [ + 18.0, + 46.2, + 68.9, + 17.6 + ], + "request_time": 27.48766326904297 + }, + "sample_part_037.mp4": { + "response": "In this clip, a person is seen walking past the counter of a convenience store. The individual appears to be in motion and possibly heading towards or away from the entrance of the store. There are no other significant actions taking place within the frame during this segment of the video.", + "tokens_per_second": 5.201636019010958, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 56, + "inference_time": 28.49695110321045, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 36.0, + 34.1, + 41.3, + 38.1 + ], + "request_time": 28.511244535446167 + }, + "sample_part_038.mp4": { + "response": "In the footage, a person is seen moving around in front of an escalator. The individual appears to be either entering or exiting the frame from different positions on both sides. Throughout the sequence, they seem to be engaging with items displayed at the bottom of the store. Their actions suggest that they are browsing through various products available for sale.", + "tokens_per_second": 5.959308976482591, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 70, + "inference_time": 29.47262477874756, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 47.9, + 26.7, + 42.6, + 29.1 + ], + "request_time": 29.48682999610901 + }, + "sample_part_039.mp4": { + "response": "A man in blue clothes is climbing up a narrow aisle filled with products. He reaches the top and hangs on to hold himself there.", + "tokens_per_second": 3.3460227628996955, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 30, + "inference_time": 26.825403690338135, + "cpu_usage": 38.0, + "cpu_core_utilization": [ + 50.8, + 48.6, + 31.4, + 21.4 + ], + "request_time": 26.83967351913452 + }, + "sample_part_040.mp4": { + "response": "A man in a blue shirt enters the frame, bends down to pick up something from below. He then stands back up and continues to look at items on display.", + "tokens_per_second": 3.758188071762749, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 35, + "inference_time": 27.106918334960938, + "cpu_usage": 37.6, + "cpu_core_utilization": [ + 32.3, + 62.0, + 32.1, + 23.9 + ], + "request_time": 27.121721982955933 + }, + "sample_part_041.mp4": { + "response": "The video shows a person in blue clothing working at the counter of a convenience store. The individual is seen bending over and handling items on the counter throughout the sequence of frames.", + "tokens_per_second": 3.839752750295925, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 36, + "inference_time": 27.119013786315918, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 28.4, + 42.0, + 34.0, + 46.9 + ], + "request_time": 27.132707595825195 + }, + "sample_part_042.mp4": { + "response": "The video depicts a person in a blue shirt arranging items on the shelves of what appears to be a convenience store. The individual is focused on organizing or placing various goods, which include an assortment of products like bags and possibly other small merchandise. Throughout the sequence, there are no significant changes in the environment or introduction of new objects; the primary activity involves the continuous interaction between the person and the displayed items.", + "tokens_per_second": 6.5598252890181605, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 83, + "inference_time": 30.36869478225708, + "cpu_usage": 36.1, + "cpu_core_utilization": [ + 20.5, + 62.7, + 16.5, + 44.7 + ], + "request_time": 30.382946968078613 + }, + "sample_part_043.mp4": { + "response": "The video captures a man in blue clothing arranging items on the shelves of a convenience store. Initially, he is seen bending down and placing an item on the shelf. He then stands up to pick another one from below the counter. The timestamp indicates that this activity takes place on 03-05-2025 at 08:01:36.", + "tokens_per_second": 6.379927791197094, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 79, + "inference_time": 30.105485439300537, + "cpu_usage": 36.8, + "cpu_core_utilization": [ + 27.4, + 48.1, + 37.8, + 33.9 + ], + "request_time": 30.119589805603027 + }, + "sample_part_044.mp4": { + "response": "A man in a blue shirt and cap is seen bending down to pick up an item. He then stands back up, holding the item he picked up from below him.", + "tokens_per_second": 3.7502463323747084, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 35, + "inference_time": 27.07802414894104, + "cpu_usage": 48.6, + "cpu_core_utilization": [ + 82.7, + 37.0, + 40.9, + 33.6 + ], + "request_time": 27.09462571144104 + }, + "sample_part_045.mp4": { + "response": "A man in a blue shirt is arranging hats on the rack.", + "tokens_per_second": 1.8954757210352398, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 15, + "inference_time": 28.461366891860962, + "cpu_usage": 50.5, + "cpu_core_utilization": [ + 45.6, + 51.9, + 34.0, + 70.4 + ], + "request_time": 28.478434085845947 + }, + "sample_part_046.mp4": { + "response": "The video depicts a person in a blue shirt arranging items on the shelves of a convenience store. The individual is seen bending down, reaching for an item, and then placing it back onto the shelf. This action repeats multiple times as they continue to organize or restock the products available within this narrow retail space.", + "tokens_per_second": 5.655760454979962, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 64, + "inference_time": 29.068380117416382, + "cpu_usage": 36.7, + "cpu_core_utilization": [ + 19.4, + 41.5, + 59.0, + 27.0 + ], + "request_time": 29.083088636398315 + }, + "sample_part_047.mp4": { + "response": "The video shows a convenience store with various items displayed on shelves. Throughout the sequence, there are no visible actions or interactions taking place as all objects and their positions remain constant over time.", + "tokens_per_second": 3.9947178064700855, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 38, + "inference_time": 27.281035661697388, + "cpu_usage": 37.9, + "cpu_core_utilization": [ + 27.5, + 21.8, + 42.6, + 59.5 + ], + "request_time": 27.297154426574707 + }, + "sample_part_048.mp4": { + "response": "In the video, a person is seen climbing up to reach something on top of a display rack in a convenience store. The individual appears to be focused on accessing items that are positioned higher than their usual reach. As they climb, it becomes evident that this task requires some effort and physical movement.", + "tokens_per_second": 5.431023554612727, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 60, + "inference_time": 28.805902004241943, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 35.2, + 33.7, + 57.7, + 21.0 + ], + "request_time": 28.819944858551025 + }, + "sample_part_049.mp4": { + "response": "The video shows a convenience store with various items on display. A person enters the frame and walks towards the right side of the screen, passing by shelves filled with merchandise such as bags and other small goods. The individual appears to be browsing or looking at the products available in the store.", + "tokens_per_second": 5.315589822361752, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 58, + "inference_time": 28.63136577606201, + "cpu_usage": 44.5, + "cpu_core_utilization": [ + 27.1, + 55.7, + 35.4, + 59.8 + ], + "request_time": 28.64545965194702 + }, + "sample_part_050.mp4": { + "response": "In the video, a person is seen walking past a display rack filled with various items. The individual appears to be in motion and their actions are focused on passing by this particular area of the store.", + "tokens_per_second": 4.2201806034921, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 41, + "inference_time": 28.001408338546753, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 45.9, + 36.6, + 32.4, + 36.4 + ], + "request_time": 28.016364336013794 + }, + "sample_part_051.mp4": { + "response": "A man is seen walking into the frame. He walks up to a display of stuffed animals and starts picking them off, one by one. The items are then placed on top of each other in an orderly fashion.", + "tokens_per_second": 4.498009394398241, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 45, + "inference_time": 27.740373611450195, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 34.1, + 28.6, + 47.5, + 40.9 + ], + "request_time": 27.75620126724243 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/minicpm/20250821_034003/Clips_60s.json b/API_Transformers/output_0821/minicpm/20250821_034003/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..2fc040666f7c80044cf44627b68c264f7da96214 --- /dev/null +++ b/API_Transformers/output_0821/minicpm/20250821_034003/Clips_60s.json @@ -0,0 +1,242 @@ +{ + "video_part_001.mp4": { + "response": "The video takes place in a convenience store where multiple individuals are involved in various transactions and interactions. Initially, there is an exchange between the cashier and two customers at the counter. One customer wears black attire while the other dons grey clothing with a backpack. The scene progresses as another person enters from behind the shelves of goods.\n\nSubsequently, we observe that this individual picks up some items before returning to engage with the same customer who was initially standing next to the cashier. Meanwhile, the previously seen second customer has moved away from the counter area but soon returns. Throughout these exchanges, a man wearing blue comes into view and interacts briefly with one of the staff members.\n\nAs time moves forward, additional people enter the frame - one dressed in beige and others donning dark outfits. These newcomers appear to browse or interact with different sections within the store. In parallel, the original pair of customers continues their transaction; however, it becomes clear they eventually leave after completing their purchases.\n\nThroughout all activities, the background remains consistent with neatly arranged snack products displayed on the shelves lining both sides of the room.", + "tokens_per_second": 10.024019230028593, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 235, + "inference_time": 39.583118200302124, + "cpu_usage": 31.8, + "cpu_core_utilization": [ + 16.9, + 19.2, + 59.5, + 31.5 + ], + "request_time": 70.97838306427002 + }, + "video_part_002.mp4": { + "response": "A man in a black shirt is working at the counter. Another person wearing light blue walks up to him, and they start talking. A third individual dressed in dark clothing appears from behind them, holding an item that looks like money or some sort of document. The first two individuals continue their conversation with the cashier while the third one moves closer to them.", + "tokens_per_second": 6.118521643289556, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 73, + "inference_time": 28.183727741241455, + "cpu_usage": 33.2, + "cpu_core_utilization": [ + 57.3, + 19.5, + 10.5, + 45.5 + ], + "request_time": 28.19832706451416 + }, + "video_part_003.mp4": { + "response": "The video captures a sequence of events inside a convenience store. Initially, two customers are at the counter: one is purchasing items and another appears to be waiting for their turn or observing. The cashier assists with transactions while occasionally looking up. As time progresses, additional people enter the scene; they either browse products on the shelves or wait near the entrance. Throughout the clip, there's consistent activity around the checkout area including handling money, scanning goods, and general customer interactions typical in such retail environments.", + "tokens_per_second": 7.325785835893888, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 103, + "inference_time": 30.34404945373535, + "cpu_usage": 33.2, + "cpu_core_utilization": [ + 32.8, + 46.1, + 30.7, + 23.2 + ], + "request_time": 30.35836410522461 + }, + "video_part_004.mp4": { + "response": "In this convenience store, the cashier is interacting with customers. The first customer pays for his items and leaves afterward. Another man wearing a blue shirt walks in and picks up some products from the shelves before approaching the counter to pay for them. After paying, he also exits the scene.", + "tokens_per_second": 5.483056762285139, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 61, + "inference_time": 27.36730432510376, + "cpu_usage": 33.6, + "cpu_core_utilization": [ + 62.6, + 13.2, + 42.9, + 15.6 + ], + "request_time": 27.38129711151123 + }, + "video_part_005.mp4": { + "response": "In this convenience store scenario, a customer in white attire and black backpack approaches the counter with his purchase. The cashier is busy handling transactions on the computer while assisting him. Another individual in blue stands by nearby shelves stocked with snacks as they engage in conversation. Meanwhile, an employee at the back of the shop retrieves items from behind the counters for sale. Throughout their interaction, various products are placed onto the counter, indicating active sales taking place. Eventually, the two individuals exit the scene, leaving only the cashier to attend to other tasks until the end of the recorded segment.", + "tokens_per_second": 7.8524871607145865, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 120, + "inference_time": 31.568355798721313, + "cpu_usage": 32.7, + "cpu_core_utilization": [ + 13.2, + 42.4, + 47.5, + 27.5 + ], + "request_time": 31.582468032836914 + }, + "video_part_006.mp4": { + "response": "A man in a blue shirt is at the counter of a convenience store, handling items and making payments. A customer in a light blue shirt stands beside him, reaching for products on the shelf or table. The scene involves transactions as various customers interact with the cashier and browse the shelves stocked with snacks and other goods.", + "tokens_per_second": 5.751292635048318, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 66, + "inference_time": 27.837866067886353, + "cpu_usage": 33.5, + "cpu_core_utilization": [ + 24.6, + 32.6, + 13.2, + 63.8 + ], + "request_time": 27.856117725372314 + }, + "video_part_007.mp4": { + "response": "A woman in a red sweater enters the store, carrying two blue shopping bags. A man wearing a light blue shirt stands at the checkout counter with his hands on it and scans items using a barcode scanner. The cashier behind the counter rings up an item for sale to the customer. Another person walks by holding some objects in their hand. The camera angle changes slightly throughout the video clip.", + "tokens_per_second": 6.460640309211369, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 81, + "inference_time": 28.883904695510864, + "cpu_usage": 33.2, + "cpu_core_utilization": [ + 11.9, + 52.0, + 12.7, + 56.4 + ], + "request_time": 28.8981454372406 + }, + "video_part_008.mp4": { + "response": "In the footage, a man is seen at a convenience store checkout counter. He interacts with the cashier and exchanges items for money in front of him. The setting includes typical convenience store elements like snacks displayed on shelves and various products visible around the area.", + "tokens_per_second": 4.950112497910254, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 52, + "inference_time": 26.988234281539917, + "cpu_usage": 34.1, + "cpu_core_utilization": [ + 16.3, + 12.5, + 47.8, + 59.8 + ], + "request_time": 27.003009557724 + }, + "video_part_009.mp4": { + "response": "A man in a white shirt and black pants is at the checkout counter of a convenience store. He appears to be purchasing items, placing them into his blue shopping basket. There are two cashiers behind the counter who seem to be assisting him with his transaction. The timestamp on the video indicates that this activity is taking place during business hours on a Sunday.", + "tokens_per_second": 6.046726583056993, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 72, + "inference_time": 28.307624340057373, + "cpu_usage": 33.8, + "cpu_core_utilization": [ + 45.1, + 36.7, + 40.1, + 13.1 + ], + "request_time": 28.32164692878723 + }, + "video_part_010.mp4": { + "response": "In the convenience store, a woman in red is interacting with a cashier wearing white. The man in blue is busy packing items into his backpack while holding an item in his hand. A person dressed as a police officer captures footage of this scene on camera.", + "tokens_per_second": 5.012952424490043, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 53, + "inference_time": 26.92091703414917, + "cpu_usage": 33.9, + "cpu_core_utilization": [ + 45.7, + 15.2, + 24.5, + 49.9 + ], + "request_time": 26.9348886013031 + }, + "video_part_011.mp4": { + "response": "In this convenience store, a man in white is putting items into his backpack. A woman with red clothing approaches him and picks up an item from the counter. The cashier checks something on his phone while another person walks by holding some orange objects. Shortly afterward, two individuals wearing black enter the scene, one of whom holds a gray object.", + "tokens_per_second": 6.0496181050699604, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 72, + "inference_time": 28.263448238372803, + "cpu_usage": 33.7, + "cpu_core_utilization": [ + 15.3, + 25.6, + 46.3, + 47.7 + ], + "request_time": 28.2816801071167 + }, + "video_part_012.mp4": { + "response": "A man in a white shirt is standing at the counter, writing something on paper. A man wearing a blue T-shirt stands in front of him and hands over an item to the cashier. The cashier takes out money from his bag and gives it back to the customer. After receiving the change, the customers shake hands before leaving the store.", + "tokens_per_second": 6.042554615117601, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 72, + "inference_time": 28.374304056167603, + "cpu_usage": 33.3, + "cpu_core_utilization": [ + 53.9, + 22.6, + 36.0, + 20.6 + ], + "request_time": 28.389018297195435 + }, + "video_part_013.mp4": { + "response": "In the video, a person is seen handling items at the counter of a convenience store. Initially, they are seated and working with two laptops placed on the counter. Subsequently, this individual stands up holding a blue bag. They then walk away from the counter carrying the blue bag in one hand before returning to continue their work at the counter.", + "tokens_per_second": 5.897914933533588, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 69, + "inference_time": 28.07057809829712, + "cpu_usage": 33.2, + "cpu_core_utilization": [ + 52.2, + 22.0, + 22.6, + 35.7 + ], + "request_time": 28.085673093795776 + }, + "video_part_014.mp4": { + "response": "In a convenience store, an employee wearing a blue shirt is standing at the counter. The time on the camera reads 08:05 and it has been running for about one minute as of now. There are various items displayed in the background including snacks and other goods. Throughout this period, there aren t any noticeable changes or activities taking place except for slight movements made by the person at the counter.", + "tokens_per_second": 6.542944804531987, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 83, + "inference_time": 29.088660955429077, + "cpu_usage": 32.5, + "cpu_core_utilization": [ + 11.8, + 19.4, + 53.2, + 45.3 + ], + "request_time": 29.102965593338013 + }, + "video_part_015.mp4": { + "response": "A man in a blue shirt is working at the counter of a convenience store. He bends down, then sits on a stool while using his phone. Another person enters and hands over an item to him. The two engage in conversation as he processes the transaction.", + "tokens_per_second": 5.0694963028257245, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 54, + "inference_time": 27.06150197982788, + "cpu_usage": 33.0, + "cpu_core_utilization": [ + 18.7, + 21.7, + 41.3, + 50.1 + ], + "request_time": 27.075860023498535 + }, + "video_part_016.mp4": { + "response": "A man in a black shirt was checking out at the counter. The cashier was wearing a blue T-shirt and typing on his computer. A customer with a green bag stood next to him, watching the process. Another person walked by while they were waiting for their turn.", + "tokens_per_second": 5.245383520327553, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 57, + "inference_time": 27.29494571685791, + "cpu_usage": 33.6, + "cpu_core_utilization": [ + 53.0, + 47.3, + 12.8, + 21.4 + ], + "request_time": 27.308662176132202 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/qwen/20250821_013248/Clips_60s.json b/API_Transformers/output_0821/qwen/20250821_013248/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..ad1912b47cb3ef0b4f764b049fcc65be846571ef --- /dev/null +++ b/API_Transformers/output_0821/qwen/20250821_013248/Clips_60s.json @@ -0,0 +1,242 @@ +{ + "video_part_001.mp4": { + "response": "The video captures a moment in a convenience store where three individuals are engaged in a transaction. The person at the counter is handling money, while another person stands nearby, possibly waiting for their turn or observing the transaction. The third person is standing in the background, observing the interaction.\n\nThe store is well-lit, and the items on display are clearly visible. The counter is equipped with cash registers and other necessary equipment. The overall scene suggests a typical transaction in a convenience store, with the individuals involved in the process of purchasing or receiving goods.", + "tokens_per_second": 14.857358494588418, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 109, + "inference_time": 15.503445148468018, + "cpu_usage": 32.9, + "cpu_core_utilization": [ + 32.8, + 28.6, + 29.2, + 41.1 + ], + "request_time": 15.521787405014038 + }, + "video_part_002.mp4": { + "response": "The video captures a moment in a convenience store where a man is standing at the counter, seemingly engaged in a transaction with another person. The man at the counter is holding a receipt and appears to be interacting with the cashier. Meanwhile, another man is walking towards the counter, possibly preparing to make a purchase. The cashier is busy handling the transaction, and the other man is in motion, possibly moving towards the counter or away from it. The overall scene suggests a typical transaction in a convenience store, with the man at the counter and the other man in motion.", + "tokens_per_second": 14.913712045394723, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 113, + "inference_time": 12.929909706115723, + "cpu_usage": 42.1, + "cpu_core_utilization": [ + 24.8, + 42.8, + 25.7, + 75.0 + ], + "request_time": 12.944097757339478 + }, + "video_part_003.mp4": { + "response": "The video captures a busy convenience store scene. A cashier, wearing a black shirt, is standing behind the counter, interacting with a customer. The cashier is handling money and a receipt while the customer is standing in front of the counter. Another customer, wearing a light-colored shirt, is walking towards the counter, possibly to place an order or pick up an item. The cashier is also interacting with another customer, who is standing in front of the counter, possibly waiting for their turn or having just completed their transaction. The cashier is also seen handling money and a receipt while another customer, wearing a light-colored shirt, is walking towards the counter. The cashier is also interacting with another customer, who is standing in front of the counter, possibly waiting for their turn or having just completed their transaction. The cashier is also seen handling money and a receipt while another customer, wearing a light-colored shirt, is walking towards the counter. The cashier is also seen handling money and a receipt while another customer, wearing a light-colored shirt, is walking towards the counter. The cashier is also seen handling money and a receipt while another customer, wearing a light-colored shirt, is walking towards the counter. The cashier is also seen handling money and a receipt while another customer, wearing a light-colored shirt", + "tokens_per_second": 15.212866049846783, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 256, + "inference_time": 22.18146562576294, + "cpu_usage": 35.0, + "cpu_core_utilization": [ + 28.2, + 14.0, + 81.7, + 16.0 + ], + "request_time": 22.19561457633972 + }, + "video_part_004.mp4": { + "response": "The video captures a moment in a convenience store where a man is interacting with a cashier. The man is seen standing in front of the cashier, holding a card and speaking to the cashier. The cashier appears to be processing the transaction, possibly checking the card details or making a payment. The man then walks away from the cashier, and the cashier continues to process the transaction. The video does not show any other significant actions or interactions in the store.", + "tokens_per_second": 14.729804011346738, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 90, + "inference_time": 11.514073371887207, + "cpu_usage": 44.6, + "cpu_core_utilization": [ + 43.6, + 28.0, + 27.7, + 78.9 + ], + "request_time": 11.528048753738403 + }, + "video_part_005.mp4": { + "response": "The video captures a moment in a convenience store where a man in a blue shirt is interacting with another man. The man in the blue shirt is standing near the counter, while the other man is standing behind him. The man in the blue shirt is holding a green item, possibly a snack or a drink, while the other man is looking at the green item. The store is well-lit, and the background shows shelves filled with various products. The video does not show any other significant actions or interactions.", + "tokens_per_second": 15.087484052694805, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 102, + "inference_time": 12.099106788635254, + "cpu_usage": 41.3, + "cpu_core_utilization": [ + 37.5, + 35.4, + 68.2, + 24.2 + ], + "request_time": 12.112839221954346 + }, + "video_part_006.mp4": { + "response": "The video captures a sequence of events in a convenience store. Initially, a man in a blue shirt is seen standing at the counter, interacting with another person. A woman in a red shirt then enters the store, and she hands over a shopping bag to the man at the counter. The man then hands over the shopping bag to another person, who is also wearing a red shirt. The woman in the red shirt then hands over a shopping bag to the man at the counter. The man at the counter then hands over the shopping bag to another person, who is wearing a red shirt. Finally, the woman in the red shirt hands over a shopping bag to the man at the counter.", + "tokens_per_second": 14.952252033094313, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 138, + "inference_time": 14.600013017654419, + "cpu_usage": 40.3, + "cpu_core_utilization": [ + 21.8, + 22.2, + 22.3, + 94.7 + ], + "request_time": 14.615213632583618 + }, + "video_part_007.mp4": { + "response": "The video captures a moment in a convenience store where a cashier is interacting with a customer. The cashier is standing behind a counter, and the customer is approaching the counter with a shopping bag. The cashier appears to be assisting the customer by handling the bag and providing information or assistance. The customer seems to be receiving the bag from the cashier and then moving away with it. The video does not show any other significant actions or interactions between the people in the video.", + "tokens_per_second": 14.96244510342586, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 92, + "inference_time": 11.52415943145752, + "cpu_usage": 42.8, + "cpu_core_utilization": [ + 33.2, + 59.1, + 33.1, + 46.1 + ], + "request_time": 11.538487434387207 + }, + "video_part_008.mp4": { + "response": "The video captures a moment in a convenience store where a cashier is interacting with a customer. The cashier is standing behind a counter, and the customer is approaching the counter with a shopping bag. The cashier appears to be checking the customer's order and then handing the bag to the customer. The cashier is wearing a red shirt, and the customer is wearing a blue shirt. The cashier is also wearing a black apron. The store has shelves filled with various products, and the cashier is surrounded by other employees who are also wearing red shirts. The video shows the cashier's interaction with the customer, and the customer's approach to the counter.", + "tokens_per_second": 15.042997648777325, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 128, + "inference_time": 13.92055630683899, + "cpu_usage": 40.3, + "cpu_core_utilization": [ + 35.6, + 54.7, + 22.9, + 47.8 + ], + "request_time": 13.93495225906372 + }, + "video_part_009.mp4": { + "response": "The video captures a busy convenience store scene. A cashier is standing behind the counter, interacting with a customer. The cashier is handling a shopping bag and appears to be checking the contents. The customer is standing in front of the cashier, looking at the items in the bag. The cashier is also interacting with another person, who is standing next to the counter. The cashier is wearing a red shirt and is standing behind a counter with a computer monitor and keyboard. The customer is wearing a white shirt and is standing in front of the counter. The cashier is also wearing a red shirt and is standing behind the counter. The customer is also wearing a red shirt and is standing in front of the counter. The cashier is also wearing a red shirt and is standing behind the counter. The customer is also wearing a red shirt and is standing in front of the counter. The cashier is also wearing a red shirt and is standing behind the counter. The customer is also wearing a red shirt and is standing in front of the counter. The cashier is also wearing a red shirt and is standing behind the counter. The customer is also wearing a red shirt and is standing in front of the counter. The cashier is also wearing a red shirt and is standing behind the counter. The customer is also wearing", + "tokens_per_second": 15.022627137863786, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 256, + "inference_time": 22.475062608718872, + "cpu_usage": 35.1, + "cpu_core_utilization": [ + 14.0, + 57.8, + 15.6, + 53.2 + ], + "request_time": 22.489619493484497 + }, + "video_part_010.mp4": { + "response": "The video captures a moment in a convenience store where a man is interacting with a cashier. The man is seen placing items into a bag and then walking away. The cashier is busy processing the transaction, and there are other customers in the store. The video shows the man's actions as he moves through the store, and the cashier's actions as they process the transaction.", + "tokens_per_second": 14.923263484191663, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 74, + "inference_time": 10.318978309631348, + "cpu_usage": 45.6, + "cpu_core_utilization": [ + 35.4, + 79.2, + 32.6, + 35.0 + ], + "request_time": 10.333410263061523 + }, + "video_part_011.mp4": { + "response": "The video captures a moment in a convenience store where a man is interacting with the cashier. The man is seen standing in front of the cashier, holding a bag and a blue bag. He appears to be checking the items in the bag and then placing them on the counter. The cashier is busy with the transaction, and other customers are present in the store. The video does not show any other significant events or actions.", + "tokens_per_second": 15.164410207215045, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 84, + "inference_time": 10.977436542510986, + "cpu_usage": 44.3, + "cpu_core_utilization": [ + 88.7, + 28.9, + 33.2, + 26.5 + ], + "request_time": 10.991242408752441 + }, + "video_part_012.mp4": { + "response": "The video captures a moment in a convenience store where a man in a black jacket is interacting with a cashier. The man in the black jacket is standing near the counter, seemingly in the process of making a purchase. Meanwhile, another man in a blue shirt is standing near the counter, possibly waiting for his turn or observing the transaction. The cashier is busy writing down the details of the purchase, and the man in the black jacket appears to be paying for his purchase. The cashier is standing behind a counter, and the man in the blue shirt is standing near the counter, possibly waiting for his turn or observing the transaction. The cashier is wearing a white shirt, and the man in the blue shirt is wearing a blue shirt. The cashier is standing behind a counter, and the man in the blue shirt is standing near the counter. The cashier is wearing a white shirt, and the man in the blue shirt is wearing a blue shirt. The cashier is standing behind a counter, and the man in the blue shirt is standing near the counter. The cashier is wearing a white shirt, and the man in the blue shirt is wearing a blue shirt. The cashier is standing behind a counter, and the man in the blue shirt is standing near the counter. The cashier is wearing a", + "tokens_per_second": 15.117784722711388, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 256, + "inference_time": 22.356738567352295, + "cpu_usage": 34.6, + "cpu_core_utilization": [ + 24.1, + 60.2, + 15.1, + 38.9 + ], + "request_time": 22.371339082717896 + }, + "video_part_013.mp4": { + "response": "The video captures a moment in a convenience store where a person is seen walking towards the counter. The person is wearing a blue shirt and appears to be in motion, possibly moving towards the counter or the exit. The store is well-lit, and the counter is visible with various items on it, including a laptop and some snacks. The person's movement suggests they are either picking up an item or preparing to leave the store.", + "tokens_per_second": 14.985813897711381, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 86, + "inference_time": 11.174864530563354, + "cpu_usage": 41.0, + "cpu_core_utilization": [ + 25.1, + 70.6, + 23.8, + 44.4 + ], + "request_time": 11.189155101776123 + }, + "video_part_014.mp4": { + "response": "The video captures a moment in a convenience store where a cashier is interacting with a customer. The cashier, wearing a blue shirt, is standing behind the counter, and the customer, dressed in a white shirt, is standing in front of the cashier. The cashier is reaching out to the customer, possibly to assist or provide information. The cashier is also holding a pen, which is visible in the video. The customer is facing the cashier, and the interaction appears to be a transaction or service being provided.", + "tokens_per_second": 15.019531662577604, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 101, + "inference_time": 12.208862781524658, + "cpu_usage": 40.9, + "cpu_core_utilization": [ + 58.5, + 47.3, + 33.7, + 24.0 + ], + "request_time": 12.223294973373413 + }, + "video_part_015.mp4": { + "response": "The video captures a moment in a convenience store where a cashier is interacting with a customer. The cashier is seen standing behind the counter, handling money and a receipt while the customer walks up to the counter. The cashier then hands the customer a receipt and a package, indicating the completion of the transaction. The customer then walks away, and the cashier continues to handle the cash register.", + "tokens_per_second": 14.90843712868291, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 76, + "inference_time": 10.508623838424683, + "cpu_usage": 42.4, + "cpu_core_utilization": [ + 33.2, + 26.3, + 25.8, + 84.0 + ], + "request_time": 10.522614240646362 + }, + "video_part_016.mp4": { + "response": "The video captures a moment in a convenience store where a man is interacting with a cashier. The man is seen standing near the counter, speaking to the cashier. Meanwhile, another man is seen walking towards the counter, possibly preparing to make a purchase. The cashier appears to be handling money and possibly making change. The man walking towards the counter seems to be in the process of making a purchase. The overall scene suggests a typical transaction in a convenience store.", + "tokens_per_second": 15.01252738973886, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 91, + "inference_time": 11.522382736206055, + "cpu_usage": 42.0, + "cpu_core_utilization": [ + 24.5, + 25.9, + 25.9, + 91.7 + ], + "request_time": 11.537637710571289 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/qwen2/20250821_003022/Clips_60s.json b/API_Transformers/output_0821/qwen2/20250821_003022/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..d6b74d38fd99bb66bc9149579046c454ead60aa4 --- /dev/null +++ b/API_Transformers/output_0821/qwen2/20250821_003022/Clips_60s.json @@ -0,0 +1,137 @@ +{ + "sample_part_001.mp4": { + "response": "The video captures a busy convenience store scene with several people engaged in various activities. A woman is seen standing at the counter, possibly interacting with a customer or making a purchase. Another person is standing near the counter, possibly waiting for their turn or observing the store. \n\nIn the background, there are several other customers, some standing and others sitting, engaged in conversations or waiting for their turn. The store is well-stocked with various products, including bottles and other items, displayed on shelves and tables. \n\nThe store's interior is well-lit, with a blue and white color scheme dominating the scene. The atmosphere appears to be busy and bustling, with people moving around and interacting with each other.", + "tokens_per_second": 15.126569543378265, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 141, + "inference_time": 19.589024782180786, + "cpu_usage": 28.8, + "cpu_core_utilization": [ + 28.0, + 25.1, + 38.9, + 23.2 + ], + "request_time": 19.604559898376465 + }, + "sample_part_002.mp4": { + "response": "The video captures a busy convenience store scene with several people engaged in various activities. The primary focus is on the cashier, who is handling a customer's order. The cashier is seen interacting with the customer, possibly checking the order or providing assistance. \n\nIn the background, there are other customers and staff members, some of whom are engaged in different tasks such as packing items or interacting with other customers. The store is well-lit, and the atmosphere appears to be busy and bustling.\n\nThe video does not show any significant movement or action that would indicate a specific event or moment. The primary focus is on the cashier and the customer's interaction, with the background showing other customers and staff members.", + "tokens_per_second": 15.30312036179949, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 139, + "inference_time": 15.901453733444214, + "cpu_usage": 45.5, + "cpu_core_utilization": [ + 81.9, + 29.9, + 42.4, + 27.9 + ], + "request_time": 15.915685415267944 + }, + "sample_part_003.mp4": { + "response": "The video captures a busy convenience store scene with several people engaged in various activities. A person is seen standing near the counter, possibly waiting for service or observing the store. Another person is walking past the counter, possibly moving between different areas of the store. \n\nIn the background, there are several other people, some standing and others walking, indicating a bustling atmosphere. The store is well-stocked with various items, including shelves filled with products and a display of items on the counter. \n\nThe store's interior is well-lit, with bright lighting illuminating the shelves and products. The overall scene suggests a typical day at a convenience store with customers engaging in typical store activities.", + "tokens_per_second": 15.081962195610863, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 136, + "inference_time": 15.824401617050171, + "cpu_usage": 46.3, + "cpu_core_utilization": [ + 66.8, + 30.9, + 58.6, + 29.0 + ], + "request_time": 15.83922028541565 + }, + "sample_part_004.mp4": { + "response": "The video captures a busy convenience store scene with several people engaged in various activities. A person is seen standing near the counter, possibly waiting for service or observing the store. Another person is seen walking past the counter, possibly moving between different areas of the store. \n\nIn the background, there are several other people, some standing and others walking, indicating a bustling atmosphere. The store is well-stocked with various items, including shelves filled with products and a display of toys. \n\nThe store's interior is well-lit, with a blue and white color scheme dominating the scene. The lighting suggests that the store is open, and the people's actions indicate active shopping or waiting for service.", + "tokens_per_second": 15.1012932923201, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 138, + "inference_time": 15.923634767532349, + "cpu_usage": 44.7, + "cpu_core_utilization": [ + 29.3, + 28.1, + 27.0, + 94.3 + ], + "request_time": 15.937758445739746 + }, + "sample_part_005.mp4": { + "response": "The video captures a busy convenience store with a long checkout line. A person is seen standing at the checkout, possibly waiting for their order to be processed. The checkout area is crowded with customers, and the store appears to be busy with transactions. The store's interior is well-lit, and the checkout area is clearly visible. The video does not show any specific actions or interactions of the people in the video.", + "tokens_per_second": 14.916020163544944, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 83, + "inference_time": 12.322606801986694, + "cpu_usage": 50.3, + "cpu_core_utilization": [ + 37.0, + 57.7, + 34.8, + 71.6 + ], + "request_time": 12.337071657180786 + }, + "sample_part_006.mp4": { + "response": "The video captures a busy convenience store scene with several people engaged in various activities. The primary focus is on the cashier, who is interacting with a customer. The cashier is handling money and appears to be assisting the customer in making a purchase. \n\nIn the background, there are other customers and staff members, some of whom are engaged in conversation and others who are busy with their tasks. The store is well-lit, and the atmosphere appears to be bustling with activity. \n\nThe store's interior features shelves filled with various products, and the cashier is surrounded by a variety of items, including food and beverages. The overall scene suggests a typical day at a convenience store with customers making purchases and staff members attending to their needs.", + "tokens_per_second": 15.137056054618776, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 145, + "inference_time": 16.423986196517944, + "cpu_usage": 44.8, + "cpu_core_utilization": [ + 31.9, + 78.8, + 27.5, + 41.0 + ], + "request_time": 16.439354419708252 + }, + "sample_part_007.mp4": { + "response": "The video captures a busy convenience store scene with several people engaged in various activities. A woman is seen standing near the counter, possibly interacting with the cashier. Other customers are present, some standing and others sitting, engaged in conversations or waiting for their turn. The store is well-stocked with various items, including a large number of bottles and cans. The atmosphere appears to be busy and bustling, with people moving around and interacting with each other.", + "tokens_per_second": 15.146565978743613, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 89, + "inference_time": 12.685372352600098, + "cpu_usage": 50.4, + "cpu_core_utilization": [ + 35.0, + 35.3, + 94.0, + 37.2 + ], + "request_time": 12.699281454086304 + }, + "sample_part_008.mp4": { + "response": "In the video, a person is seen walking through a convenience store. They are seen walking past a display of items, including a large number of hats. The person appears to be looking at the display and then walks past it.", + "tokens_per_second": 15.105367104028009, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 46, + "inference_time": 9.880136013031006, + "cpu_usage": 56.6, + "cpu_core_utilization": [ + 44.1, + 58.8, + 44.6, + 78.6 + ], + "request_time": 9.894367218017578 + }, + "sample_part_009.mp4": { + "response": "In the video, a person is seen walking through a convenience store. They are seen walking through the store, looking at items on display. The person appears to be browsing through the items on the shelves.", + "tokens_per_second": 15.121278187118696, + "peak_gpu_memory_mb": 4514.375, + "num_generated_tokens": 41, + "inference_time": 9.507444143295288, + "cpu_usage": 57.4, + "cpu_core_utilization": [ + 80.4, + 45.2, + 47.0, + 57.0 + ], + "request_time": 9.521273612976074 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/qwen2_5/20250821_004434/Clips_60s.json b/API_Transformers/output_0821/qwen2_5/20250821_004434/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..34f0a81481aa52371704c049c7823197d565a259 --- /dev/null +++ b/API_Transformers/output_0821/qwen2_5/20250821_004434/Clips_60s.json @@ -0,0 +1,62 @@ +{ + "sample_part_001.mp4": { + "response": "A person wearing blue gloves picks up a bottle of water from the floor and places it back on the shelf.", + "tokens_per_second": 11.896679103804114, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 22, + "inference_time": 12.730306386947632, + "cpu_usage": 20.1, + "cpu_core_utilization": [ + 17.7, + 19.0, + 21.6, + 22.0 + ], + "request_time": 12.745316982269287 + }, + "sample_part_002.mp4": { + "response": "A person wearing blue gloves picks up a bottle from the counter, then places it back down. The person then picks up another bottle and places it on the counter.", + "tokens_per_second": 12.02869428489415, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 33, + "inference_time": 10.584169864654541, + "cpu_usage": 55.1, + "cpu_core_utilization": [ + 42.1, + 41.3, + 93.8, + 43.2 + ], + "request_time": 10.599837064743042 + }, + "sample_part_003.mp4": { + "response": "A man wearing a blue shirt and gloves picks up a bottle of water from the shelf. He then places it back on the shelf.", + "tokens_per_second": 11.82593643667435, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 27, + "inference_time": 10.161383867263794, + "cpu_usage": 56.2, + "cpu_core_utilization": [ + 90.9, + 44.3, + 46.7, + 42.8 + ], + "request_time": 10.176398277282715 + }, + "sample_part_004.mp4": { + "response": "A man in a blue shirt walks into a convenience store. He stops at a display of snacks and picks up a bag of chips. He then walks to the checkout counter and places the bag of chips in his shopping cart.", + "tokens_per_second": 11.785621023429538, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 45, + "inference_time": 11.899845838546753, + "cpu_usage": 53.0, + "cpu_core_utilization": [ + 38.8, + 90.1, + 41.1, + 42.3 + ], + "request_time": 11.914695501327515 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/qwen2_5/20250821_004550/Clips_60s.json b/API_Transformers/output_0821/qwen2_5/20250821_004550/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..3be010ef8b81c73166a284d3981154e2a4d5aa79 --- /dev/null +++ b/API_Transformers/output_0821/qwen2_5/20250821_004550/Clips_60s.json @@ -0,0 +1,77 @@ +{ + "sample_part_001.mp4": { + "response": "The video depicts a checkout area in a store, likely a supermarket or grocery store. The scene is captured from an overhead perspective, providing a clear view of the checkout counter and the items being handled.\n\nIn this particular segment, a customer is interacting with a cashier who is wearing gloves. The cashier is handling various items, including what appears to be a box of cereal or a similar packaged food item. The customer is also wearing gloves, indicating that they might be handling food items as well. Both individuals are focused on their tasks, ensuring that the items are correctly priced and scanned.\n\nThe checkout area is well-lit, and the shelves behind the counter are stocked with numerous products, including what looks like bottled beverages and other packaged goods. The cashier's actions are methodical, suggesting a routine check-out process. The customer's movements are deliberate, possibly checking the prices or preparing to pay for their items.\n\nOverall, the video captures a typical moment in a retail environment where both the customer and the cashier are engaged in their respective tasks, ensuring a smooth checkout experience.", + "tokens_per_second": 11.854063880552362, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 213, + "inference_time": 28.37625813484192, + "cpu_usage": 43.3, + "cpu_core_utilization": [ + 34.4, + 74.4, + 32.5, + 31.9 + ], + "request_time": 28.390744924545288 + }, + "sample_part_002.mp4": { + "response": "The video depicts a checkout area in a store, likely a supermarket or convenience store. The camera angle is from above, providing an overhead view of the scene.\n\nIn the foreground, there is a checkout counter with various items displayed on it. These items include packaged goods such as snacks, drinks, and other small groceries. The counter is well-stocked, indicating that the store is busy with customers.\n\nA person wearing blue gloves is seen interacting with the items on the counter. They appear to be either scanning items for purchase or organizing them. The person's movements are deliberate and focused, suggesting they are engaged in their work.\n\nIn the background, another person can be seen, possibly a customer or another employee. This individual is also wearing blue gloves and seems to be handling some items on the counter. The interaction between these individuals suggests a collaborative effort in managing the checkout area.\n\nThe overall environment appears organized and efficient, typical of a well-maintained retail setting. The lighting is bright, ensuring that all items are clearly visible to both the staff and the customers. The presence of multiple items on display indicates a variety of products available for purchase, catering to different customer needs.\n\nOverall, the video captures a typical day at a checkout area, highlighting the routine activities of staff and the", + "tokens_per_second": 11.997021386458192, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 256, + "inference_time": 29.07604718208313, + "cpu_usage": 37.0, + "cpu_core_utilization": [ + 33.9, + 16.1, + 80.3, + 17.7 + ], + "request_time": 29.090667486190796 + }, + "sample_part_003.mp4": { + "response": "The video depicts a checkout area within a store, likely a supermarket or convenience store. The camera angle is from above, providing a bird's-eye view of the scene.\n\nIn the foreground, there is a checkout counter with various items displayed on it. These items include packaged goods such as snacks, beverages, and possibly other small groceries. The counter appears to be well-stocked, indicating that the store is operational and stocked with products for customers to purchase.\n\nA person, presumably a customer, is standing at the checkout counter. They are wearing a blue shirt and appear to be interacting with the cashier. The customer is holding a plastic bag, which they have placed on the counter. This suggests that they might be in the process of purchasing items and preparing to pay for them.\n\nThe cashier is also visible, though their face is not clearly discernible. The cashier seems to be attending to the customer, likely scanning items and processing payments. The interaction between the customer and the cashier indicates a typical checkout scenario where the customer is making a purchase.\n\nThe background shows shelves filled with more products, reinforcing the idea that this is a well-stocked retail environment. The lighting is bright, typical of a commercial setting, ensuring that all items are clearly visible to both the customer and the cashier.\n\n", + "tokens_per_second": 12.037390307990146, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 256, + "inference_time": 29.021669626235962, + "cpu_usage": 37.2, + "cpu_core_utilization": [ + 48.4, + 16.9, + 65.1, + 18.1 + ], + "request_time": 29.03581213951111 + }, + "sample_part_004.mp4": { + "response": "The video depicts a scene inside a store, likely a supermarket or a similar retail environment. The camera is positioned at eye level, capturing a wide view of the area.\n\nIn the foreground, there is a display counter with various items for sale. The counter appears to be stocked with products, possibly including food items like fruits and vegetables, as indicated by the visible packaging and arrangement of items. There are also some promotional materials or signs placed on the counter, which might be used to highlight specific products or promotions.\n\nA person, presumably a customer, is seen interacting with the display counter. This individual is wearing a blue shirt and seems to be either picking up an item from the counter or examining something closely. The person's actions suggest they are engaged in shopping or browsing the available products.\n\nThe background shows more shelves and displays filled with various goods. The shelves are well-stocked, indicating that the store has a variety of products available. The lighting in the store is bright, typical of retail environments, ensuring that the products are clearly visible to customers.\n\nOverall, the video captures a typical moment in a retail setting where a customer is interacting with the merchandise displayed on the counter. The presence of promotional materials and the well-organized shelves contribute to the typical atmosphere of a modern retail store", + "tokens_per_second": 12.027123899562989, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 256, + "inference_time": 29.077280521392822, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 74.2, + 17.8, + 15.5, + 40.0 + ], + "request_time": 29.091379404067993 + }, + "sample_part_005.mp4": { + "response": "The video depicts a retail environment, likely a store or a supermarket, where a person is interacting with an item on display. The setting appears to be a well-lit aisle with various products arranged neatly on shelves. The individual is seen reaching out towards the items, possibly examining them or preparing to make a purchase.\n\nThe person's actions suggest they might be considering different products, perhaps comparing prices or features. The surrounding area includes other products and possibly promotional materials, indicating that this is a commercial space designed for shopping. The lighting is bright, ensuring that the products are clearly visible to customers.\n\nThere are no visible staff members in the frame, so the interaction seems to be between the customer and the displayed items. The overall atmosphere of the video suggests a typical day at a retail location where customers are engaged in their shopping activities.", + "tokens_per_second": 11.998806395924422, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 165, + "inference_time": 21.516730070114136, + "cpu_usage": 40.1, + "cpu_core_utilization": [ + 93.9, + 22.7, + 21.5, + 22.1 + ], + "request_time": 21.531025886535645 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/qwen2_5/20250821_005053/Clips_60s.json b/API_Transformers/output_0821/qwen2_5/20250821_005053/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..8f146f55709f4841df64649d970cea4717312dcb --- /dev/null +++ b/API_Transformers/output_0821/qwen2_5/20250821_005053/Clips_60s.json @@ -0,0 +1,77 @@ +{ + "sample_part_001.mp4": { + "response": "A person wearing blue gloves picks up a bottle of water from the floor and places it back on the shelf.", + "tokens_per_second": 11.82581726573877, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 22, + "inference_time": 11.477622509002686, + "cpu_usage": 19.9, + "cpu_core_utilization": [ + 17.8, + 19.9, + 21.8, + 20.2 + ], + "request_time": 11.491647720336914 + }, + "sample_part_002.mp4": { + "response": "A person wearing blue gloves picks up a bottle from the counter, then places it back down. The person then picks up another bottle and places it on the counter.", + "tokens_per_second": 11.74072921403584, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 33, + "inference_time": 10.658955097198486, + "cpu_usage": 56.2, + "cpu_core_utilization": [ + 43.8, + 43.4, + 43.8, + 93.4 + ], + "request_time": 10.674673557281494 + }, + "sample_part_003.mp4": { + "response": "A man wearing a blue shirt and gloves is standing at a checkout counter. He is holding a plastic bag in his left hand and a black item in his right hand. He places the black item into the plastic bag.", + "tokens_per_second": 11.73304837389127, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 44, + "inference_time": 11.553057432174683, + "cpu_usage": 52.1, + "cpu_core_utilization": [ + 38.3, + 93.8, + 38.3, + 38.0 + ], + "request_time": 11.567878246307373 + }, + "sample_part_004.mp4": { + "response": "A man in a blue shirt walks into a convenience store. He stops at a display of snacks and picks up a bag of chips. He then walks to the checkout counter and places the bag of chips in his shopping cart.", + "tokens_per_second": 11.929480284506932, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 45, + "inference_time": 11.5685555934906, + "cpu_usage": 52.3, + "cpu_core_utilization": [ + 89.2, + 38.6, + 42.9, + 38.3 + ], + "request_time": 11.582696676254272 + }, + "sample_part_005.mp4": { + "response": "A man walks into a convenience store. He picks up a bottle of water from the shelf and places it in his shopping cart. He then walks to the checkout counter and pays for his items.", + "tokens_per_second": 11.89941941740383, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 39, + "inference_time": 11.116148233413696, + "cpu_usage": 53.2, + "cpu_core_utilization": [ + 37.8, + 40.6, + 93.6, + 40.6 + ], + "request_time": 11.130709886550903 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/qwen2_5/20250821_005221/Clips_60s.json b/API_Transformers/output_0821/qwen2_5/20250821_005221/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..547e1c3778f404d19706a20f1a4da824709cc33e --- /dev/null +++ b/API_Transformers/output_0821/qwen2_5/20250821_005221/Clips_60s.json @@ -0,0 +1,137 @@ +{ + "sample_part_001.mp4": { + "response": "A person wearing blue gloves is seen placing a box of snacks on a conveyor belt. The person then picks up another box of snacks from the floor and places it on the conveyor belt.", + "tokens_per_second": 11.87194334609885, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 37, + "inference_time": 10.964619874954224, + "cpu_usage": 20.3, + "cpu_core_utilization": [ + 15.5, + 15.3, + 33.8, + 16.7 + ], + "request_time": 10.978468179702759 + }, + "sample_part_002.mp4": { + "response": "A person wearing blue gloves is seen placing items into a shopping cart. The person then picks up a bottle from the floor and places it into the cart. The person then picks up another item from the floor and places it into the cart.", + "tokens_per_second": 11.83286096302887, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 48, + "inference_time": 11.95991587638855, + "cpu_usage": 52.1, + "cpu_core_utilization": [ + 39.5, + 38.2, + 37.0, + 93.5 + ], + "request_time": 11.974170684814453 + }, + "sample_part_003.mp4": { + "response": "A person wearing a blue glove picks up a package from the floor, then places it back down. Another person wearing a blue glove picks up a package from the floor, then places it back down.", + "tokens_per_second": 11.760994323642526, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 40, + "inference_time": 11.289813041687012, + "cpu_usage": 53.3, + "cpu_core_utilization": [ + 40.3, + 75.1, + 39.1, + 58.8 + ], + "request_time": 11.306392669677734 + }, + "sample_part_004.mp4": { + "response": "A man in a blue shirt walks through a convenience store aisle, passing by various products on the shelves. He stops at a display of snacks and picks up a bag. He then continues down the aisle, passing by more products.", + "tokens_per_second": 11.888364051217732, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 46, + "inference_time": 11.696603775024414, + "cpu_usage": 51.6, + "cpu_core_utilization": [ + 59.1, + 67.7, + 41.2, + 38.5 + ], + "request_time": 11.71169400215149 + }, + "sample_part_005.mp4": { + "response": "A person is seen walking through a convenience store aisle, picking up a package from the shelf. They then place it back on the shelf before continuing down the aisle.", + "tokens_per_second": 11.790592742669173, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 33, + "inference_time": 10.743636846542358, + "cpu_usage": 55.3, + "cpu_core_utilization": [ + 53.6, + 43.0, + 81.8, + 42.7 + ], + "request_time": 10.760183811187744 + }, + "sample_part_006.mp4": { + "response": "A person is seen reaching for a product on a shelf. They then pick up the item and place it into their shopping cart.", + "tokens_per_second": 11.854764118772119, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 26, + "inference_time": 10.05913519859314, + "cpu_usage": 55.9, + "cpu_core_utilization": [ + 48.9, + 44.4, + 87.7, + 42.4 + ], + "request_time": 10.072754621505737 + }, + "sample_part_007.mp4": { + "response": "A woman is standing at the checkout counter, holding a yellow receipt. She places the receipt on the counter and then picks up a blue bag from the floor. She then places the blue bag back down and continues to stand at the checkout counter.", + "tokens_per_second": 11.806017274209756, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 49, + "inference_time": 11.997853994369507, + "cpu_usage": 51.4, + "cpu_core_utilization": [ + 49.4, + 37.7, + 81.8, + 36.8 + ], + "request_time": 12.011772871017456 + }, + "sample_part_008.mp4": { + "response": "A man picks up a bag of chips from the shelf, then places it back down. He then picks up another bag of chips and walks away with it.", + "tokens_per_second": 11.878890265234213, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 32, + "inference_time": 10.614681482315063, + "cpu_usage": 55.0, + "cpu_core_utilization": [ + 90.1, + 42.2, + 45.8, + 41.8 + ], + "request_time": 10.62955355644226 + }, + "sample_part_009.mp4": { + "response": "A person is seen walking through a convenience store aisle, picking up items from the shelves. They then place the items back on the shelves before continuing to walk down the aisle.", + "tokens_per_second": 11.82995179235076, + "peak_gpu_memory_mb": 5348.375, + "num_generated_tokens": 35, + "inference_time": 10.799692392349243, + "cpu_usage": 53.8, + "cpu_core_utilization": [ + 49.9, + 41.3, + 78.9, + 45.0 + ], + "request_time": 10.814430475234985 + } +} \ No newline at end of file diff --git a/API_Transformers/output_0821/qwen2_5/20250821_025409/Clips_60s.json b/API_Transformers/output_0821/qwen2_5/20250821_025409/Clips_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..c5c7adb3b75036b3f3fcdcecacefac74ce16e194 --- /dev/null +++ b/API_Transformers/output_0821/qwen2_5/20250821_025409/Clips_60s.json @@ -0,0 +1,242 @@ +{ + "video_part_001.mp4": { + "response": "A man is standing at the counter of a convenience store, interacting with a cashier. The cashier is using a computer to process the transaction. The man is holding a bag of snacks and appears to be paying for them. The cashier is typing on the computer and looking at the man. The man then walks away from the counter.", + "tokens_per_second": 11.859020159952623, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 66, + "inference_time": 14.119965553283691, + "cpu_usage": 2.0, + "cpu_core_utilization": [ + 2.0, + 2.0, + 1.9, + 1.9 + ], + "request_time": 14.134722232818604 + }, + "video_part_002.mp4": { + "response": "A man is standing at the counter of a convenience store, looking at his phone. He then turns to the cashier and hands her a card. The cashier takes the card and places it into a machine. The man then walks away from the counter.", + "tokens_per_second": 12.033912631174916, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 50, + "inference_time": 10.49275827407837, + "cpu_usage": 44.1, + "cpu_core_utilization": [ + 80.0, + 27.5, + 40.1, + 29.0 + ], + "request_time": 10.508188486099243 + }, + "video_part_003.mp4": { + "response": "A man is standing at the counter of a convenience store, looking at his phone. He then turns to the cashier and hands her a bag of groceries. The cashier scans the items and takes payment from the man. The man then walks away with his bag of groceries.", + "tokens_per_second": 11.980873759204092, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 54, + "inference_time": 10.841167688369751, + "cpu_usage": 43.7, + "cpu_core_utilization": [ + 49.1, + 32.4, + 65.3, + 27.8 + ], + "request_time": 10.855761528015137 + }, + "video_part_004.mp4": { + "response": "A customer is shopping at a convenience store. The cashier is scanning items and ringing them up. The customer is looking at the items on the counter and talking to the cashier. The cashier is using a computer to scan the items and ring them up. The customer is also looking at the items on the counter and talking to the cashier. The cashier is also looking at the items on the counter and talking to the customer. The customer is also looking at the items on the counter and talking to the cashier. The cashier is also looking at the items on the counter and talking to the customer. The customer is also looking at the items on the counter and talking to the cashier. The cashier is also looking at the items on the counter and talking to the customer. The customer is also looking at the items on the counter and talking to the cashier. The cashier is also looking at the items on the counter and talking to the customer. The customer is also looking at the items on the counter and talking to the cashier. The cashier is also looking at the items on the counter and talking to the customer. The customer is also looking at the items on the counter and talking to the cashier. The cashier is also looking at the items on the counter and talking to the customer. The customer", + "tokens_per_second": 11.894932301505968, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 256, + "inference_time": 27.97708511352539, + "cpu_usage": 33.8, + "cpu_core_utilization": [ + 13.9, + 45.9, + 13.3, + 61.9 + ], + "request_time": 27.991957426071167 + }, + "video_part_005.mp4": { + "response": "A man in a blue shirt is at the checkout counter, scanning items and placing them on the conveyor belt. He then picks up a bag of chips from the counter and places it on the conveyor belt. Another man in a blue shirt is standing behind him, watching the process. The man at the counter continues to scan items and place them on the conveyor belt.", + "tokens_per_second": 11.99842860278374, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 73, + "inference_time": 12.41398811340332, + "cpu_usage": 40.7, + "cpu_core_utilization": [ + 34.0, + 38.6, + 64.3, + 25.9 + ], + "request_time": 12.428631782531738 + }, + "video_part_006.mp4": { + "response": "A man in a blue shirt is standing behind a counter at a convenience store. He is holding a phone in his hand. A woman in a red vest walks up to the counter and hands him a plastic bag. She then walks away. The man puts something into the plastic bag. Another man in a red shirt walks up to the counter and hands the man in the blue shirt another plastic bag. The man in the blue shirt puts something into the plastic bag. The man in the red shirt walks away.", + "tokens_per_second": 11.862422969421846, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 102, + "inference_time": 15.035767078399658, + "cpu_usage": 39.3, + "cpu_core_utilization": [ + 21.5, + 43.6, + 21.6, + 70.6 + ], + "request_time": 15.050400972366333 + }, + "video_part_007.mp4": { + "response": "A customer is shopping at a convenience store. The cashier is scanning items and ringing them up. The customer is looking at the items they are purchasing. The cashier is talking to the customer. The customer is putting items into their basket. The cashier is taking money from the customer. The customer is walking out of the store.", + "tokens_per_second": 12.007495276914103, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 65, + "inference_time": 11.82714319229126, + "cpu_usage": 42.8, + "cpu_core_utilization": [ + 60.5, + 34.4, + 49.7, + 26.5 + ], + "request_time": 11.842108011245728 + }, + "video_part_008.mp4": { + "response": "A cashier at a convenience store is scanning items for a customer. The customer is holding a blue plastic bag and appears to be adding items to it. Another person is standing behind the counter, possibly assisting with the transaction or managing inventory. The cashier is focused on scanning the items, while the customer is engaged in conversation with the cashier.", + "tokens_per_second": 11.871294681994929, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 67, + "inference_time": 12.125900506973267, + "cpu_usage": 43.3, + "cpu_core_utilization": [ + 35.9, + 32.5, + 78.1, + 26.8 + ], + "request_time": 12.140157222747803 + }, + "video_part_009.mp4": { + "response": "A customer is shopping at a convenience store. The cashier is scanning items and ringing them up. The customer is pushing a shopping cart and picking up items from the shelves. The customer is also using a laptop to look up prices or make a purchase.", + "tokens_per_second": 11.63501242448262, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 50, + "inference_time": 10.730757713317871, + "cpu_usage": 46.3, + "cpu_core_utilization": [ + 38.3, + 58.2, + 31.7, + 56.7 + ], + "request_time": 10.745136737823486 + }, + "video_part_010.mp4": { + "response": "A cashier at a convenience store is scanning items for a customer. The customer is holding a bag and appears to be paying for their purchase. The cashier is using a cash register and typing on a keyboard. The customer is standing behind the counter, looking at the cashier. The cashier is wearing a white shirt and black pants. The customer is wearing a red shirt and blue jeans. There are several other people in the store, including a man in a black shirt and a woman in a red shirt. The store has a variety of products on display, including snacks and drinks.", + "tokens_per_second": 11.874488678953208, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 115, + "inference_time": 16.08448100090027, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 19.6, + 68.7, + 18.4, + 44.3 + ], + "request_time": 16.099010229110718 + }, + "video_part_011.mp4": { + "response": "A man in a white shirt is at the counter of a convenience store, interacting with a cashier. The man is seen placing items into a blue bag. Another man in a red shirt is also present at the counter, looking at his phone. A third man in a black shirt is standing near the counter, observing the interaction between the man in the white shirt and the cashier.", + "tokens_per_second": 11.829041430743297, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 76, + "inference_time": 12.926791906356812, + "cpu_usage": 42.0, + "cpu_core_utilization": [ + 28.8, + 42.5, + 25.9, + 70.9 + ], + "request_time": 12.941241264343262 + }, + "video_part_012.mp4": { + "response": "A man in a white shirt is standing behind a counter at a convenience store. He is using a cash register to scan items. A man in a black shirt walks up to the counter and hands the man in the white shirt a bag of chips. The man in the white shirt takes the bag of chips from the man in the black shirt and scans it with the cash register. The man in the black shirt then walks away.", + "tokens_per_second": 11.928726359703456, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 86, + "inference_time": 13.74686050415039, + "cpu_usage": 39.8, + "cpu_core_utilization": [ + 31.6, + 62.3, + 41.3, + 23.8 + ], + "request_time": 13.761478900909424 + }, + "video_part_013.mp4": { + "response": "A person wearing a yellow shirt and blue jeans walks into the convenience store. They approach the counter and place an item on it. The person then walks out of the store.", + "tokens_per_second": 12.014726651428436, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 35, + "inference_time": 9.374907970428467, + "cpu_usage": 43.8, + "cpu_core_utilization": [ + 29.4, + 29.6, + 88.1, + 27.9 + ], + "request_time": 9.388736724853516 + }, + "video_part_014.mp4": { + "response": "A person wearing a blue cap is standing at the counter of a convenience store. They are looking at the items on the counter, which include a laptop, a cash register, and a display of snacks. The person then picks up a bottle of water from the counter and places it in their shopping bag.", + "tokens_per_second": 11.861972979079045, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 61, + "inference_time": 11.606827020645142, + "cpu_usage": 41.6, + "cpu_core_utilization": [ + 42.9, + 26.4, + 27.0, + 69.9 + ], + "request_time": 11.620692253112793 + }, + "video_part_015.mp4": { + "response": "A man wearing a blue shirt is standing at the counter of a convenience store. He is looking at his phone. A man wearing a black shirt walks up to the counter and hands something to the man in the blue shirt. The man in the blue shirt takes the item from the man in the black shirt. The man in the blue shirt then walks away from the counter.", + "tokens_per_second": 12.034885208983422, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 75, + "inference_time": 12.683927297592163, + "cpu_usage": 39.5, + "cpu_core_utilization": [ + 59.8, + 22.7, + 53.7, + 22.1 + ], + "request_time": 12.697991609573364 + }, + "video_part_016.mp4": { + "response": "A customer is shopping at a convenience store. The cashier is scanning items and ringing them up. The customer is selecting items from the shelves and placing them into their bag. The customer is also using a credit card to pay for their items. The cashier is typing on the cash register and looking at the computer screen. The customer is walking around the store, looking at different items. The cashier is standing behind the counter, looking at the computer screen. The customer is putting items into their bag. The cashier is ringing up the items. The customer is looking at the items on the shelves. The cashier is looking at the computer screen. The customer is walking around the store. The cashier is standing behind the counter. The customer is putting items into their bag. The cashier is ringing up the items. The customer is looking at the items on the shelves. The cashier is looking at the computer screen. The customer is walking around the store. The cashier is standing behind the counter. The customer is putting items into their bag. The cashier is ringing up the items. The customer is looking at the items on the shelves. The cashier is looking at the computer screen. The customer is walking around the store. The cashier is standing behind the counter. The customer is putting items into their", + "tokens_per_second": 12.052444962168167, + "peak_gpu_memory_mb": 5350.375, + "num_generated_tokens": 256, + "inference_time": 27.71171498298645, + "cpu_usage": 33.2, + "cpu_core_utilization": [ + 31.4, + 17.1, + 70.7, + 13.4 + ], + "request_time": 27.72644567489624 + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_222617.json b/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_222617.json new file mode 100644 index 0000000000000000000000000000000000000000..e848a4373f1a414917c727c188fa3f2a6440da2b --- /dev/null +++ b/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_222617.json @@ -0,0 +1 @@ +{"messi_part_001.mp4": {"response": "The video captures a soccer match between two teams, with the audience cheering and the players celebrating. The video features a player wearing a red and blue uniform, who is seen running on the field and celebrating with the crowd. The video also shows a player wearing a blue and white uniform, who is seen running on the field and celebrating with the crowd. The video ends with a shot of the player wearing a red and blue uniform, who is seen running on the field and celebrating with the crowd.", "tokens_per_second": 8.544413217338054, "peak_gpu_memory_mb": 4498.375}} \ No newline at end of file diff --git a/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_223141.json b/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_223141.json new file mode 100644 index 0000000000000000000000000000000000000000..b586883c721993af5cd670edb8e1161ee8bdaa65 --- /dev/null +++ b/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_223141.json @@ -0,0 +1,15 @@ +{ + "messi_part_001.mp4": { + "response": "The video captures a soccer match between two teams, with the audience cheering and the players celebrating. The video features a player wearing a red and blue uniform, who is seen running on the field and celebrating with the crowd. The video also shows a player wearing a blue and white uniform, who is seen running on the field and celebrating with the crowd. The video ends with a shot of the player wearing a red and blue uniform, who is seen running on the field and celebrating with the crowd.", + "tokens_per_second": 9.100198479728341, + "peak_gpu_memory_mb": 4498.375, + "inference_time": 14.894975662231445, + "cpu_usage": 0.0, + "cpu_core_utilization": [ + 0.0, + 0.0, + 0.0, + 0.0 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_223603.json b/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_223603.json new file mode 100644 index 0000000000000000000000000000000000000000..8b9f6d98487727b60157bf1582aa1aa1d7d1e807 --- /dev/null +++ b/API_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250818_223603.json @@ -0,0 +1,15 @@ +{ + "messi_part_001.mp4": { + "response": "The video captures a soccer match between two teams, with the audience cheering and the players celebrating. The video features a player wearing a red and blue uniform, who is seen running on the field and celebrating with the crowd. The video also shows a player wearing a blue and white uniform, who is seen running on the field and celebrating with the crowd. The video ends with a shot of the player wearing a red and blue uniform, who is seen running on the field and celebrating with the crowd.", + "tokens_per_second": 9.058665203909582, + "peak_gpu_memory_mb": 4498.375, + "inference_time": 14.381455659866333, + "cpu_usage": 64.5, + "cpu_core_utilization": [ + 61.5, + 64.5, + 60.3, + 71.7 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/lfm/lfm_30s.json b/API_Transformers/outputs/lfm/lfm_30s.json new file mode 100644 index 0000000000000000000000000000000000000000..9193e49dcb3934417899e132d81365411e289a36 --- /dev/null +++ b/API_Transformers/outputs/lfm/lfm_30s.json @@ -0,0 +1,1318 @@ +{ + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_001.mp4": { + "response": "The video shows a soccer match between FC Barcelona and Malaga. The match took place on January 23, 2016, and was part of La Liga. The video appears to be a freeze frame from a soccer game, with the scoreboard visible in the background. The scoreboard shows that FC Barcelona is currently leading with a score of 1-0. The video is presented by WOUVA.", + "tokens_per_second": 39.115405931589194, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 84, + "inference_time": 20.53607702255249, + "cpu_usage": 66.5, + "cpu_core_utilization": [ + 64.7, + 59.3, + 65.1, + 76.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_002.mp4": { + "response": "The image shows a soccer match between La Liga team La Liga and Valencia. The match took place on August 14, 2010, and was part of the 99th season of La Liga. The teams are competing on a green field, with players from both teams actively engaged in the game. The scoreboard indicates that La Liga won the match 2-0. The image captures the intensity and excitement of the match, with players running, kicking the ball, and competing for possession.", + "tokens_per_second": 41.680290716366436, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 102, + "inference_time": 26.451772451400757, + "cpu_usage": 81.6, + "cpu_core_utilization": [ + 79.6, + 82.3, + 82.8, + 81.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_003.mp4": { + "response": "This image is a screenshot from a soccer match between Barcelona and Getafe, which took place on April 23, 2014. The match was part of the La Liga competition, and it was played at the Estadio Camp Nou in Barcelona. The screenshot shows a moment during the game where a player from Barcelona, wearing a blue and red striped jersey, is about to kick the ball towards the goal. The goalkeeper from Getafe is in the net, trying to block the shot. The scoreboard in the image indicates that the score is 0-0, and the match is in the 98th minute. The image also includes some text at the bottom, which reads \"98 La Liga 23.04.2014 vs Getafe\", confirming the date and teams playing.", + "tokens_per_second": 40.298898666280635, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 165, + "inference_time": 26.20504403114319, + "cpu_usage": 81.7, + "cpu_core_utilization": [ + 78.7, + 80.5, + 83.6, + 83.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_004.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Atletico Madrid. The player wearing the number 10 jersey, who is likely Lionel Messi, is about to score a goal. The ball is in mid-air, and the goalkeeper is in the background, ready to make a save. The match is taking place in a stadium filled with spectators. The image captures a crucial moment in the game, with the outcome hanging in the balance.", + "tokens_per_second": 40.64591219302388, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 94, + "inference_time": 25.52985715866089, + "cpu_usage": 77.0, + "cpu_core_utilization": [ + 74.3, + 78.8, + 81.3, + 73.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_005.mp4": { + "response": "The image shows a soccer match in progress, with players from two teams on the field. One team is wearing purple and pink striped jerseys, while the other team is in white jerseys. The players are actively engaged in the game, with some running and others in various positions on the field. The image appears to be a screenshot from a broadcast or a video, as there is a timestamp visible at the bottom of the screen. The text \"96. Club World Cup 18/12/2011 vs Santos\" is also displayed, indicating the date and the teams playing in the match.", + "tokens_per_second": 40.392638063278525, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 125, + "inference_time": 26.59394097328186, + "cpu_usage": 78.7, + "cpu_core_utilization": [ + 82.9, + 76.9, + 73.8, + 81.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_006.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score currently at 0-0. The match is taking place in the 95th minute of the game, as indicated by the text \"95. La Liga 2012/2013 vs Cordoba\" at the bottom of the image. The players are on the field, with one team wearing blue and red striped jerseys and the other team wearing white jerseys. The goalkeeper is visible in the background, and the stadium is filled with spectators. The image appears to be a screenshot from a broadcast or a video recording of the match.", + "tokens_per_second": 39.74288596804158, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 131, + "inference_time": 27.094712257385254, + "cpu_usage": 78.7, + "cpu_core_utilization": [ + 75.1, + 72.3, + 82.9, + 84.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_007.mp4": { + "response": "The image shows a soccer match between Real Madrid and AS Roma, with the score currently at 0-0. The match is taking place in the Champions League on April 23, 2015. The players are on the field, with one team wearing white uniforms and the other team wearing blue and red striped uniforms. The goalkeeper is visible in the foreground, wearing a green uniform. The image captures the intensity and excitement of the match, with the players focused on the game and the crowd watching intently.", + "tokens_per_second": 41.13905098824316, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 106, + "inference_time": 26.659747838974, + "cpu_usage": 82.0, + "cpu_core_utilization": [ + 73.4, + 81.4, + 92.3, + 80.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_008.mp4": { + "response": "The image shows a soccer match between Real Madrid and Atletico Madrid. The match took place on April 23, 2017, as indicated by the text \"93 La Liga 24.09.2017 vs Atletico Madrid\" at the bottom of the image. The players are on the field, with one team wearing blue and red striped jerseys and the other team wearing white jerseys. The goalkeeper is in the goal, and the ball is in the air, suggesting an ongoing play. The stadium is filled with spectators, creating an atmosphere of excitement and anticipation for the match.", + "tokens_per_second": 40.618019438790945, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 127, + "inference_time": 25.055734395980835, + "cpu_usage": 78.6, + "cpu_core_utilization": [ + 76.3, + 76.6, + 77.4, + 84.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_009.mp4": { + "response": "This image shows a soccer match between La Liga team La Liga and Atletico Madrid. The match took place on September 24, 2011, and was part of the 2010-2011 season. The scoreboard indicates that La Liga won the match 2-0 against Atletico Madrid. The players on the field are wearing their respective team uniforms, with La Liga players in blue and red striped jerseys and Atletico Madrid players in blue jerseys. The image captures a moment during the game, with players actively engaged in the match.", + "tokens_per_second": 40.50383986283677, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 119, + "inference_time": 24.166887998580933, + "cpu_usage": 79.8, + "cpu_core_utilization": [ + 80.5, + 76.1, + 81.0, + 81.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_010.mp4": { + "response": "The image shows a soccer match between La Liga teams Real Madrid and Celta de Vigo. The match took place on September 11, 2018, and ended with a score of 2-0 in favor of Real Madrid. The image captures a moment during the game, with players on the field and a crowd in the background. The scoreboard at the bottom of the image displays the match details, including the date, teams, and score.", + "tokens_per_second": 38.89654933093534, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 93, + "inference_time": 24.5272536277771, + "cpu_usage": 78.8, + "cpu_core_utilization": [ + 72.9, + 80.8, + 84.5, + 77.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_011.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Sporting Gijon, with the score currently at 0-0. The match is taking place on February 17, 2012, and it's the 89th match of the La Liga season. The players are on the field, with one team wearing blue and white striped jerseys and the other team wearing red and blue striped jerseys. The image captures a moment of excitement and anticipation as the players prepare to kick off the match.", + "tokens_per_second": 39.596664910065556, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 106, + "inference_time": 25.26091170310974, + "cpu_usage": 82.3, + "cpu_core_utilization": [ + 85.2, + 80.6, + 78.8, + 84.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_012.mp4": { + "response": "The image shows a soccer match between Malaga and Villalreal, with the score currently at 0-0. The match is taking place on February 5, 2015, and is being broadcast on WOWA TV. The players on the field are wearing blue and red striped jerseys, representing their respective teams. The goalkeeper is wearing a green jersey, indicating that he is the goalkeeper for Villalreal. The image captures a moment of action, with one player in mid-air, possibly attempting to head the ball or make a save. The crowd in the background is blurred, suggesting that the focus is on the players and the match itself.", + "tokens_per_second": 37.65531289972925, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 140, + "inference_time": 26.731486558914185, + "cpu_usage": 81.6, + "cpu_core_utilization": [ + 77.8, + 81.0, + 87.1, + 80.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_013.mp4": { + "response": "This image shows a soccer match between Real Betis and Villarreal, taking place on February 12, 2012. The match is being broadcast on WOWA TV channel 66. The scoreboard indicates that Villarreal is currently leading with a score of 1-0. The players on the field are wearing their respective team uniforms, with Real Betis players in blue and white striped jerseys, and Villarreal players in purple jerseys. The image captures a moment of action on the field, with players from both teams engaged in the game.", + "tokens_per_second": 41.228258805708954, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 120, + "inference_time": 26.693491458892822, + "cpu_usage": 77.5, + "cpu_core_utilization": [ + 80.0, + 75.9, + 74.8, + 79.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_014.mp4": { + "response": "The image shows a soccer match between Real Betis and Lleida, with the score currently tied at 0-0. The match is taking place on September 12, 2012, and is part of the 85th Champions League game. The players are on the field, with one team wearing blue and red uniforms and the other team wearing white uniforms. The image captures a moment of excitement and anticipation as the game continues.", + "tokens_per_second": 41.332802503833406, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 90, + "inference_time": 25.003145456314087, + "cpu_usage": 77.5, + "cpu_core_utilization": [ + 79.1, + 64.7, + 75.0, + 91.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_015.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Huesca, with the score currently at 0-0. The match is taking place in the 85th minute of the game, as indicated by the text at the bottom of the screen. The players are on the field, with one team wearing blue and red striped jerseys and the other team wearing white jerseys. The stadium is filled with spectators, creating a lively atmosphere for the match.", + "tokens_per_second": 36.78989463023373, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 97, + "inference_time": 26.93568253517151, + "cpu_usage": 80.8, + "cpu_core_utilization": [ + 84.7, + 70.3, + 74.8, + 93.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_016.mp4": { + "response": "The image shows a soccer match between Real Valladolid and La Liga team, with the score displayed as 82-82. The match took place on October 27, 2019. The image appears to be a screenshot from a video, as indicated by the \"WOUNA WWW\" watermark in the top right corner. The scene captures a moment on the field, with players from both teams in action. The goalkeeper is visible in the background, and the crowd can be seen in the stands. The image is slightly blurry, suggesting it was captured from a moving camera or during a fast-paced moment in the game.", + "tokens_per_second": 40.060213000999134, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 133, + "inference_time": 24.30997395515442, + "cpu_usage": 75.3, + "cpu_core_utilization": [ + 72.0, + 74.7, + 80.4, + 74.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_017.mp4": { + "response": "This image shows a soccer match between FC Barcelona and Real Valladolid. The match took place on October 25, 2019, and was part of La Liga. The score at the time of the image was 82-82, indicating a tied game. The players are wearing their respective team jerseys, with Barcelona in blue and pink checkered patterns and Real Valladolid in white. The image captures a moment of action on the field, with players from both teams engaged in play.", + "tokens_per_second": 36.93354278794345, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 103, + "inference_time": 26.13444471359253, + "cpu_usage": 83.1, + "cpu_core_utilization": [ + 84.8, + 76.8, + 77.8, + 93.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_018.mp4": { + "response": "The image shows a soccer match between Malaga and Real Madrid, with the score currently at 0-0. The match is taking place at the Estadio de la Coru\u00f1a in Malaga, Spain. The players are in action on the field, with one player from each team in the middle of the action. The image is a screenshot from a broadcast, with the text \"81 Copa Rey 16/05/2005 vs Malaga\" displayed at the bottom, indicating the date and teams playing. The broadcast logo \"WOUYA\" is visible in the top right corner.", + "tokens_per_second": 36.78504412759604, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 122, + "inference_time": 25.16861057281494, + "cpu_usage": 79.4, + "cpu_core_utilization": [ + 76.1, + 67.9, + 81.4, + 92.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_019.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Real Betis, with the score currently at 0-0. The match is taking place in the La Liga, which is a professional football league in Spain. The players are on the field, and the crowd is visible in the background. The image appears to be a screenshot from a video, as there is a timestamp at the bottom of the screen.", + "tokens_per_second": 40.30796192039967, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 84, + "inference_time": 25.524508476257324, + "cpu_usage": 81.2, + "cpu_core_utilization": [ + 84.0, + 77.4, + 78.2, + 85.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_020.mp4": { + "response": "The image shows a soccer match between Real Betis and La Liga team, with the score currently at 0-0. The match took place on February 23, 2019, and the final score was 2-2. The image appears to be a screenshot from a video, as there is a timestamp at the bottom of the screen. The match was played at the Estadio de la Coru\u00f1a in Betis, Spain.", + "tokens_per_second": 38.74070756902105, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 91, + "inference_time": 24.27034592628479, + "cpu_usage": 80.2, + "cpu_core_utilization": [ + 80.6, + 69.2, + 75.8, + 95.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_021.mp4": { + "response": "The image shows a soccer match between La Liga team 78 and Sevilla, taking place on February 23, 2019. The match is part of the 77th Club World Cup. The players are on the field, with one team wearing purple and yellow uniforms and the other team in white uniforms. The scoreboard at the bottom of the image indicates that the match is currently 3-0 in favor of La Liga team 78. The image appears to be a screenshot from a broadcast or a video recording of the match.", + "tokens_per_second": 41.32099005919523, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 110, + "inference_time": 25.01966094970703, + "cpu_usage": 80.5, + "cpu_core_utilization": [ + 79.4, + 80.7, + 84.1, + 77.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_022.mp4": { + "response": "The image shows a soccer player, Lionel Messi, wearing a pink jersey. He is in the middle of a soccer match, likely celebrating a goal or a victory. The text on the image indicates that this is a match from the 2014-2015 season of the La Liga, which is a professional soccer league in Spain. The match was between Real Madrid and another team, and it took place on April 23, 2015. The player's jersey has the logo of Qatar Airways, which is a sponsor of the team.", + "tokens_per_second": 41.598046238624825, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 114, + "inference_time": 24.734081983566284, + "cpu_usage": 82.0, + "cpu_core_utilization": [ + 85.4, + 74.6, + 76.6, + 91.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_023.mp4": { + "response": "The image shows a soccer match between Real Madrid and Olympique Lyon, taking place on March 4, 2017. The match is part of the 75th Champions League, as indicated by the text at the bottom of the image. The players are on a green field, with one team wearing white and the other team wearing orange. The scoreboard at the bottom of the image shows that the score is 0-0. The image appears to be a screenshot from a broadcast or a video, as there is a watermark in the top right corner that says \"WOUVA WWW\".", + "tokens_per_second": 40.963503525455884, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 124, + "inference_time": 25.894067525863647, + "cpu_usage": 77.9, + "cpu_core_utilization": [ + 75.3, + 79.5, + 79.3, + 77.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_024.mp4": { + "response": "This image shows a soccer match between Olympique Lyonnais and Barcelona, with the score currently 0-0. The match is taking place in the 75th minute of the game on March 13, 2019. The players on the field are wearing their respective team jerseys, with Barcelona players in blue and red striped jerseys and Olympique Lyonnais players in orange jerseys. The image captures a moment of action on the field, with one player from each team in the midst of play. The scoreboard at the bottom of the image confirms the current score and the time of the match.", + "tokens_per_second": 39.38300814479115, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 130, + "inference_time": 25.453104734420776, + "cpu_usage": 78.2, + "cpu_core_utilization": [ + 81.7, + 80.8, + 73.0, + 77.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_025.mp4": { + "response": "The image shows a soccer match between Colombia and another team, likely Spain, during the 73rd World Cup Qualification match on 21st November 2007. The Colombian team is wearing yellow jerseys, while the opposing team is in blue. The match is taking place in a large stadium with a crowd of spectators in the background. The image appears to be a screenshot from a television broadcast, with the text \"73 World Cup Qualification 21/11/2007 vs Colombia\" displayed at the bottom.", + "tokens_per_second": 40.238911139014256, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 110, + "inference_time": 26.95945429801941, + "cpu_usage": 78.2, + "cpu_core_utilization": [ + 76.9, + 69.2, + 76.6, + 89.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_026.mp4": { + "response": "The image shows a soccer match between Atletico Madrid and Real Madrid, which took place on March 1, 2008. The match was part of the 72nd La Liga season. The players are seen on the field, with some wearing yellow jerseys and others in blue and white striped jerseys. The scoreboard indicates that the match ended in a 1-1 draw. The image captures the intensity and excitement of the game, with players actively engaged in the match.", + "tokens_per_second": 40.17368557750544, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 103, + "inference_time": 25.94055485725403, + "cpu_usage": 78.3, + "cpu_core_utilization": [ + 74.2, + 81.6, + 81.0, + 76.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_027.mp4": { + "response": "The image shows a soccer match between Real Madrid and Malaga, taking place on August 23, 2012. The match is being broadcast on WOWA TV channel. The scoreboard indicates that Real Madrid is leading with a score of 1-0. The players on the field are wearing their respective team uniforms, with Real Madrid in blue and Malaga in white. The image captures a moment during the game, with the ball in play and players from both teams positioned around it. The atmosphere appears to be intense, with the players focused on the game and the spectators watching with anticipation.", + "tokens_per_second": 39.38549335675344, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 123, + "inference_time": 26.074825525283813, + "cpu_usage": 78.8, + "cpu_core_utilization": [ + 80.6, + 60.0, + 77.3, + 97.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_028.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Malaga, with the score currently at 0-0. The match is taking place on August 4, 2015, at the La Masia stadium. The players are on the field, with one team wearing blue and red striped jerseys and the other team wearing white jerseys. The image is a screenshot from a broadcast, with the text \"69 La Masia 08/04/2015 vs Malaga\" displayed at the bottom.", + "tokens_per_second": 37.358557004221744, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 108, + "inference_time": 25.992740631103516, + "cpu_usage": 81.2, + "cpu_core_utilization": [ + 82.6, + 72.3, + 80.2, + 89.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_029.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and red striped jerseys and the other in white jerseys. The match is taking place on a green field with white lines marking the boundaries. The players are actively engaged in the game, with one player in possession of the ball and others positioned around him. The scoreboard at the bottom of the image indicates that the match is in the 69th minute, with a score of 0-0 between the two teams. The image captures the intensity and excitement of a live soccer match.", + "tokens_per_second": 40.02365392147096, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 116, + "inference_time": 25.656752347946167, + "cpu_usage": 80.4, + "cpu_core_utilization": [ + 78.8, + 83.4, + 81.6, + 77.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_030.mp4": { + "response": "The image shows a soccer match between Rayo Vallecano and La Liga team Real Madrid. The scoreboard indicates that it's the 68th minute of the game, and the final score is 2-1 in favor of Real Madrid. The players are on the field, with one player from Real Madrid, Alexis, celebrating a goal. The scoreboard also displays the date of the match as 23/11/2011.", + "tokens_per_second": 40.11205537436322, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 89, + "inference_time": 26.13630247116089, + "cpu_usage": 83.8, + "cpu_core_utilization": [ + 81.8, + 83.3, + 83.5, + 86.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_031.mp4": { + "response": "The image shows a soccer match between La Liga teams La Habra and Tenerife. The match is taking place in the 2010-2011 season and is part of a game against Espanyol. The scoreboard in the image shows that La Habra is currently leading with a score of 1-0. The players on the field are wearing blue and white striped jerseys, while the players in the opposing team are wearing pink jerseys. The image captures a moment during the game, with the ball in play and players from both teams positioned around the field.", + "tokens_per_second": 43.732303463922946, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 120, + "inference_time": 23.92936396598816, + "cpu_usage": 47.0, + "cpu_core_utilization": [ + 37.9, + 27.6, + 39.6, + 83.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_032.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona during the 2010 Champions League final. The scoreboard at the bottom of the screen indicates that the match ended in a 1-1 draw. The players are seen running on the field, with the goalkeeper in a yellow jersey diving to make a save. The scoreboard also displays the date of the match, which is 17th August 2010.", + "tokens_per_second": 41.1306443391851, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 88, + "inference_time": 22.845361948013306, + "cpu_usage": 48.6, + "cpu_core_utilization": [ + 40.5, + 38.9, + 83.4, + 31.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_033.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Sevilla, taking place on February 28, 2016. The match is part of the La Liga competition, as indicated by the text at the bottom of the image. The players are on the field, with one team wearing blue and red striped jerseys, while the other team is in white jerseys. The scoreboard in the image shows that the score is 0-0, indicating that the match is still in its early stages. The image captures the intensity and excitement of the game, with the players focused on the ball and the crowd cheering in the background.", + "tokens_per_second": 39.2482211811343, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 131, + "inference_time": 26.371907472610474, + "cpu_usage": 79.9, + "cpu_core_utilization": [ + 70.7, + 79.5, + 91.0, + 78.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_034.mp4": { + "response": "This image is a screenshot from a soccer match between Argentina and Uruguay during the 62nd World Cup Qualification match on October 13, 2012. The match was between Uruguay and Argentina, with Uruguay winning 2-1. The image shows a player from Argentina celebrating a goal, while Uruguay's players are huddled together in the background. The scoreboard at the top of the image shows the score as 2-1 in favor of Uruguay. The image is from a broadcast by WOWA TV, as indicated by the logo in the top right corner.", + "tokens_per_second": 43.710601210811305, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 119, + "inference_time": 25.703685760498047, + "cpu_usage": 50.7, + "cpu_core_utilization": [ + 57.0, + 41.9, + 65.6, + 38.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_035.mp4": { + "response": "The image shows a soccer match between Real Madrid and Almer\u00eda, with the score at 1-1. The match took place on December 28, 2013, and was broadcast on WOWA TV. The players are on the field, with one player from each team visible. The scoreboard at the bottom of the screen shows the score and date.", + "tokens_per_second": 43.34082730242674, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 76, + "inference_time": 21.512945890426636, + "cpu_usage": 44.1, + "cpu_core_utilization": [ + 24.8, + 29.9, + 95.2, + 26.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_036.mp4": { + "response": "The image shows a group of soccer players celebrating on a field. They are wearing blue and red striped jerseys, which are the colors of FC Barcelona. The players are embracing each other and hugging, expressing their joy and excitement. The image is from a match between FC Barcelona and Almeria, which took place on September 28, 2013. The players are shown in a moment of triumph, likely after scoring a goal or winning the match. The atmosphere is one of camaraderie and sportsmanship, with the players sharing a collective sense of achievement.", + "tokens_per_second": 43.305425094512614, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 121, + "inference_time": 23.016352653503418, + "cpu_usage": 52.2, + "cpu_core_utilization": [ + 40.0, + 54.3, + 38.4, + 75.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_037.mp4": { + "response": "The image shows a screenshot from a soccer match between La Liga teams Real Madrid and Espanyol. The match took place on July 12, 2014, and ended with a score of 1-1. The screenshot captures a moment during the game, with players from both teams on the field. The scoreboard at the bottom of the screen indicates the final score and the date of the match. The image also includes a watermark from the channel \"WOUYA\" in the top right corner.", + "tokens_per_second": 43.2809732947507, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 106, + "inference_time": 22.871727228164673, + "cpu_usage": 57.7, + "cpu_core_utilization": [ + 43.8, + 51.8, + 47.4, + 87.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_038.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Alaves, with the score currently at 0-0. The match is taking place in the La Liga, which is a professional football league in Spain. The players are on the field, and the ball is in play. The image captures a moment of intense competition between the two teams.", + "tokens_per_second": 44.07200758227364, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 70, + "inference_time": 21.80135941505432, + "cpu_usage": 45.3, + "cpu_core_utilization": [ + 32.1, + 53.0, + 27.2, + 68.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_039.mp4": { + "response": "The image shows a soccer match between La Liga teams La Masia and Alav\u00e9s. The match took place on December 21, 2019, and was part of the 57th La Liga season. The scoreboard indicates that La Masia won the match with a score of 2-0 against Alav\u00e9s. The players on the field are wearing their respective team colors, with La Masia in blue and red striped jerseys, and Alav\u00e9s in white jerseys. The image captures a moment of action during the game, with players from both teams competing for the ball.", + "tokens_per_second": 43.32052884810036, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 125, + "inference_time": 23.634844303131104, + "cpu_usage": 45.4, + "cpu_core_utilization": [ + 36.6, + 29.6, + 80.9, + 34.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_040.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Eliar, with the score currently at 55-55. The match is taking place on September 2, 2020, and it's a Champions League game. The players are on the field, with some in blue and red striped jerseys representing Barcelona, and others in green jerseys representing Eliar. The image appears to be a screenshot from a broadcast, with the text \"56. Champions League 14.09.2010 vs Panathinaikos\" visible at the bottom.", + "tokens_per_second": 43.58098238207414, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 116, + "inference_time": 22.50316596031189, + "cpu_usage": 43.7, + "cpu_core_utilization": [ + 29.7, + 54.7, + 25.5, + 64.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_041.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Valencia, with the score currently at 55-54 in favor of Valencia. The match is taking place on February 2, 2019, and is part of La Liga. The players are on the field, with one team wearing blue and pink striped jerseys and the other team in white jerseys. The image appears to be a screenshot from a video, as there are black bars at the top and bottom of the image. The text \"55. La Liga 02/02/2019 vs Valencia\" is displayed at the bottom of the image.", + "tokens_per_second": 43.43359338437597, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 129, + "inference_time": 22.663415670394897, + "cpu_usage": 42.2, + "cpu_core_utilization": [ + 50.8, + 51.4, + 39.2, + 27.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_042.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Atletico Madrid. The match took place on February 2, 2019, and was part of La Liga. In the image, Lionel Messi, wearing a Barcelona jersey with the number 10, is seen celebrating with his teammates after scoring a goal. The scoreboard in the image shows that Barcelona won the match 3-0. The players are on the field, and the crowd is visible in the background. The image captures a moment of triumph for Barcelona and their players.", + "tokens_per_second": 43.5451659695564, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 114, + "inference_time": 23.548773527145386, + "cpu_usage": 45.3, + "cpu_core_utilization": [ + 28.8, + 62.1, + 31.5, + 58.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_043.mp4": { + "response": "The image shows a soccer match between Real Madrid and Atletico Madrid. The scoreboard indicates that the match is taking place on 24th September 2011, and it's the 53rd minute of the game. The teams are playing on a green field, with players from both teams visible in the image. The scoreboard also displays the logos of the teams and sponsors, including Real Madrid and Atletico Madrid. The image appears to be a screenshot from a television broadcast of the match.", + "tokens_per_second": 43.72216137040779, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 105, + "inference_time": 22.736104011535645, + "cpu_usage": 45.6, + "cpu_core_utilization": [ + 38.4, + 38.5, + 74.5, + 31.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_044.mp4": { + "response": "The image shows a soccer match in progress, with players from two teams competing on the field. One team is wearing blue and red striped jerseys, while the other team is in white jerseys. The players are actively engaged in the game, with one player in possession of the ball, preparing to make a play. The match appears to be taking place in a stadium, with spectators visible in the background. The image captures the intensity and excitement of a live soccer match.", + "tokens_per_second": 43.69812721151038, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 100, + "inference_time": 22.36645793914795, + "cpu_usage": 43.8, + "cpu_core_utilization": [ + 24.1, + 76.7, + 32.2, + 41.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_045.mp4": { + "response": "The image shows a soccer match between La Liga teams La Coruna and Sevilla. The match took place on October 30, 2010, and was broadcast on WOWA TV. The scoreboard indicates that La Coruna won the match with a score of 1-0. The players on the field are wearing their respective team jerseys, with La Coruna in white and Sevilla in blue and red striped jerseys. The image captures a moment during the game, with one player from La Coruna controlling the ball and another player from Sevilla nearby. The background shows the green soccer field and some spectators in the stands.", + "tokens_per_second": 43.60305109768778, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 132, + "inference_time": 22.685951948165894, + "cpu_usage": 43.3, + "cpu_core_utilization": [ + 52.4, + 45.1, + 48.5, + 27.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_046.mp4": { + "response": "The image shows a soccer match between La Liga teams La Liga 08/03/2013 and Deportivo La Conru\u00f1a. The match took place on August 3, 2013. The players are seen on the field, with some wearing blue and red jerseys, while others are in white jerseys. The goalkeeper is in front of the goal, ready to defend. The image captures the intensity and excitement of the game, with players actively engaged in the match.", + "tokens_per_second": 43.843882768658645, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 100, + "inference_time": 22.802679538726807, + "cpu_usage": 45.2, + "cpu_core_utilization": [ + 27.3, + 27.9, + 94.3, + 31.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_047.mp4": { + "response": "This image captures a moment from a soccer match between Real Madrid and Atletico Madrid, which took place on February 6, 2011. The scoreboard at the bottom of the screen shows that Real Madrid is leading with a score of 48, while Atletico Madrid has a score of 10. The match is being played at the Wanda Metropolitano stadium in Madrid, Spain. The image appears to be a screenshot from a broadcast or a video recording of the game.", + "tokens_per_second": 43.798575227467595, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 102, + "inference_time": 22.23618173599243, + "cpu_usage": 44.4, + "cpu_core_utilization": [ + 44.4, + 29.2, + 75.3, + 28.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_048.mp4": { + "response": "The image shows a soccer match between Real Madrid and Atletico Madrid. The scoreboard indicates that it's the 47th minute of the game, and Real Madrid is leading 1-0. The players are on the field, with one team wearing green jerseys and the other team wearing white jerseys. The match is part of the Spanish Super Cup, which took place on August 29, 2012. The image captures a moment of excitement and tension as Real Madrid takes the lead in the game.", + "tokens_per_second": 43.89402793517226, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 108, + "inference_time": 22.3653507232666, + "cpu_usage": 44.3, + "cpu_core_utilization": [ + 86.0, + 31.9, + 30.7, + 28.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_049.mp4": { + "response": "The image shows a soccer match between Brazil and Argentina, with the score currently at 45-45. The match is being played on November 17, 2010, and it's a friendly game between the two teams. The players are wearing their respective team jerseys, with the Brazilian team in blue and white striped jerseys, and the Argentine team in yellow jerseys. The image captures a moment of celebration or disappointment, as the players are huddled together on the field. The scoreboard at the bottom of the image shows the current score and the date of the match.", + "tokens_per_second": 43.56379224518244, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 124, + "inference_time": 22.528496026992798, + "cpu_usage": 43.2, + "cpu_core_utilization": [ + 31.4, + 85.1, + 26.9, + 29.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_050.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and white striped jerseys and the other in pink and blue jerseys. The match is taking place on a green field with a goalpost visible in the background. The scoreboard at the bottom of the image indicates that the match is between \"La Liga\" and \"Deportivo La Conruana\" on July 17, 2008. The image appears to be a screenshot from a broadcast or a video recording of the match.", + "tokens_per_second": 43.61928425153152, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 107, + "inference_time": 22.562195301055908, + "cpu_usage": 44.0, + "cpu_core_utilization": [ + 24.9, + 65.1, + 31.4, + 54.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_051.mp4": { + "response": "The image shows a soccer match between La Liga team La Coruna and another team, likely from a different league. The match is taking place on July 17, 2008, as indicated by the text \"44. La Coruna 17/07/2008 vs Deportivo La Coruna\" at the bottom of the image. The players are on a green field, with one team wearing blue and white striped jerseys and the other team wearing yellow jerseys. The goalkeeper is visible in the background, wearing a green jersey. The image appears to be a screenshot from a video or broadcast of the match.", + "tokens_per_second": 42.97388508982766, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 131, + "inference_time": 23.273664474487305, + "cpu_usage": 44.6, + "cpu_core_utilization": [ + 52.0, + 43.1, + 56.1, + 27.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_052.mp4": { + "response": "The image shows a soccer match in progress, with players from two teams competing on the field. One team is wearing yellow uniforms, while the other team is wearing red and white striped uniforms. The match appears to be taking place in a stadium, with spectators visible in the background. The image is likely a screenshot from a broadcast or a video recording of the game. The text \"La Liga 01/12/2019 vs Atletico Madrid\" is visible at the bottom of the image, indicating that the match is part of the La Liga league and is being played against Atletico Madrid.", + "tokens_per_second": 43.800756217355975, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 125, + "inference_time": 22.562139749526978, + "cpu_usage": 43.4, + "cpu_core_utilization": [ + 38.0, + 28.5, + 79.2, + 27.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_053.mp4": { + "response": "The image shows a soccer match between Real Madrid and Atletico Madrid. The scoreboard indicates that the match is taking place on July 12, 2019, and it's the 42nd minute of play. The teams are lined up on the field, ready to start the game. The image captures the anticipation and excitement of the upcoming match.", + "tokens_per_second": 43.63552451411287, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 73, + "inference_time": 21.77809429168701, + "cpu_usage": 44.8, + "cpu_core_utilization": [ + 64.1, + 30.4, + 56.3, + 28.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_054.mp4": { + "response": "The image shows a soccer match between Real Madrid and La Liga team, with the score displayed as 41-0 in favor of Real Madrid. The match took place on July 7, 2012, and was part of the Champions League 2012-2013. The image appears to be a screenshot from a broadcast, with the text \"WOUWA WWW\" visible in the top right corner. The image is somewhat blurry, making it difficult to see the players' faces clearly.", + "tokens_per_second": 43.636680260959196, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 104, + "inference_time": 22.278985261917114, + "cpu_usage": 44.5, + "cpu_core_utilization": [ + 52.8, + 60.9, + 26.8, + 37.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_055.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score currently at 1-1. The match is taking place in a large stadium filled with spectators. The players are wearing their respective team jerseys, with Real Madrid in white and Barcelona in blue and red. The image appears to be a screenshot from a broadcast or a video, as there is a timestamp and a logo visible in the corner. The text \"39 biga 10/03/2007 vs Real Madrid\" suggests that this is a match from the 2007 season.", + "tokens_per_second": 43.44007709060384, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 118, + "inference_time": 23.51341676712036, + "cpu_usage": 45.3, + "cpu_core_utilization": [ + 30.5, + 45.8, + 29.1, + 75.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_056.mp4": { + "response": "The image shows a soccer player celebrating on the field. The player is wearing a blue jersey with the number 10 on it. The player is surrounded by other players, some of whom are also celebrating. The image appears to be a still from a soccer match, possibly between two teams with blue and white jerseys. The player in the blue jersey is likely a key player for his team, as the number 10 is often assigned to a star striker or playmaker. The celebration on the field suggests that the team has just scored a goal or won the match.", + "tokens_per_second": 43.73466548585687, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 119, + "inference_time": 22.740065813064575, + "cpu_usage": 44.0, + "cpu_core_utilization": [ + 47.8, + 31.9, + 71.2, + 24.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_057.mp4": { + "response": "The image shows a soccer match between the United States and the United States men's national team, which is part of the 2014 FIFA World Cup. The match took place on April 17, 2016, at the Estadio Azteca in Mexico City. The scoreboard indicates that the United States won the match 3-0, with goals scored by David Villa, Zinedine Zidane, and Luis Suarez. The image captures a moment during the game, with players from both teams on the field, and the crowd in the background cheering. The atmosphere is electric, with the excitement of the fans and the intensity of the players on the field.", + "tokens_per_second": 43.44122225548033, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 140, + "inference_time": 23.453000783920288, + "cpu_usage": 44.5, + "cpu_core_utilization": [ + 81.3, + 36.1, + 31.2, + 29.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_058.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and white striped jerseys and the other in green jerseys. The match is taking place on a green field with white lines marking the boundaries. The players are actively engaged in the game, with one player in the foreground kicking the ball towards the goal. The scoreboard in the corner indicates that the team in blue and white striped jerseys is leading with a score of 2-0. The match is part of the 36th round of the World Cup, with the date being May 25, 2018. The teams are Nigeria and another team with a different jersey color scheme.", + "tokens_per_second": 43.24753914054676, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 141, + "inference_time": 23.695804119110107, + "cpu_usage": 44.8, + "cpu_core_utilization": [ + 29.3, + 80.1, + 29.0, + 40.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_059.mp4": { + "response": "This image is a screenshot from a soccer match between Argentina and Nigeria during the 2018 World Cup. The scoreboard at the bottom of the screen shows that Argentina is leading 10-0. The match is taking place on May 25, 2018, and it's the 35th Champions League game. The image captures a moment of Argentina's dominance, with a player from Argentina running towards the goal. The screenshot also includes a comment from a viewer who says \"Comment: Messi is the best. If you watched the video, you can see it here.\" The image is from a broadcast by WOWA TV.", + "tokens_per_second": 43.41159506336397, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 132, + "inference_time": 23.600623607635498, + "cpu_usage": 45.4, + "cpu_core_utilization": [ + 41.3, + 45.2, + 28.8, + 66.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_060.mp4": { + "response": "The image shows a soccer match between Villarreal and Lleida, with the score currently at 0-0. The match is taking place in the 34th minute of the game. The players are on the field, with some in yellow jerseys and others in purple and blue striped jerseys. The goalkeeper is visible in the goal area, and the ball is in play. The image appears to be a screenshot from a broadcast or a video recording of the match.", + "tokens_per_second": 42.664462930711395, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 103, + "inference_time": 22.689029693603516, + "cpu_usage": 45.6, + "cpu_core_utilization": [ + 51.6, + 29.2, + 67.2, + 34.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_061.mp4": { + "response": "This image is a screenshot from a soccer match between FC Barcelona and Bayer Leverkusen. The match took place on July 3, 2019, in La Liga. The screenshot shows a moment during the game where a player from FC Barcelona is in possession of the ball, preparing to make a play. The scoreboard at the bottom of the screen indicates that the score is 0-0, and the match is in the 33rd minute. The image captures the intensity and excitement of the game, with players from both teams on the field.", + "tokens_per_second": 43.51970989323823, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 115, + "inference_time": 23.804783582687378, + "cpu_usage": 47.8, + "cpu_core_utilization": [ + 49.2, + 36.0, + 68.3, + 37.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_062.mp4": { + "response": "The image shows a soccer match in progress, with a player in a blue and red striped jersey, likely the goalkeeper, positioned in front of the goal. The player is wearing a yellow jersey with the number 10 on it. The match is taking place in the 32nd round of the La Liga 2017-2018 season, as indicated by the text at the bottom of the image. The opposing team is wearing white jerseys. The image captures a moment of action, with the player in blue and red likely having just taken a shot on goal or is preparing to defend. The crowd in the background is blurred, focusing the viewer's attention on the player and the immediate action around the goal.", + "tokens_per_second": 43.540681114859616, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 150, + "inference_time": 24.191356897354126, + "cpu_usage": 43.9, + "cpu_core_utilization": [ + 47.8, + 48.5, + 45.2, + 33.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_063.mp4": { + "response": "The image you've shared is a screenshot from a video of a soccer match. The match is the 30th Champions League game, which took place on June 4th, 2010, between Arsenal and Manchester United. The player featured in the image is Lionel Messi, who is wearing a Barcelona jersey with the number 10. He appears to be in the middle of the field, possibly dribbling or passing the ball. The background shows other players from both teams, as well as the goalposts. The image has a slightly blurry quality, which is common in video footage.", + "tokens_per_second": 37.796216146669394, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 126, + "inference_time": 23.913387775421143, + "cpu_usage": 53.0, + "cpu_core_utilization": [ + 73.7, + 42.0, + 52.8, + 43.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_064.mp4": { + "response": "The image shows a soccer match between La Liga team Real Madrid and Getafe against the team Iran. The match took place on June 2, 2010, and was part of the 28th World Cup. The scoreboard indicates that Real Madrid won the match with a score of 3-0. The image captures a moment during the game, with players on the field and spectators in the stands. The atmosphere appears to be intense and competitive, as is typical in a high-stakes soccer match.", + "tokens_per_second": 43.727275245614855, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 105, + "inference_time": 25.784566164016724, + "cpu_usage": 52.1, + "cpu_core_utilization": [ + 57.7, + 38.9, + 74.6, + 37.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_065.mp4": { + "response": "The image shows a screenshot from a soccer match between Iran and Argentina during the 28th round of the 2014 FIFA World Cup. The match took place on June 2, 2014, and was played at the Wanda Metropolitano stadium in Madrid, Spain. The score at the time of the screenshot was 0-0, with both teams having one player each. The image captures a moment of celebration for the Argentine player, who is seen with his arms outstretched in a victorious pose. The screenshot also includes the match details and the teams playing, as well as the location and date of the match.", + "tokens_per_second": 38.956429242334835, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 133, + "inference_time": 24.38482093811035, + "cpu_usage": 69.1, + "cpu_core_utilization": [ + 65.4, + 61.1, + 64.1, + 85.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_066.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and white striped jerseys and the other in yellow jerseys. The match is taking place in a stadium filled with spectators. The players are actively engaged in the game, with one player in a yellow jersey kicking the ball. The scoreboard in the background displays the match details, indicating that this is a professional-level soccer match. The image captures the intensity and excitement of the game, with the players focused on their performance and the spectators cheering them on.", + "tokens_per_second": 43.58446168324715, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 113, + "inference_time": 25.848461389541626, + "cpu_usage": 71.0, + "cpu_core_utilization": [ + 87.1, + 64.6, + 66.0, + 66.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_067.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score displayed as 26-26. The match took place on December 26, 2012, and was part of the 26th round of the La Liga competition. The image appears to be a screenshot from a broadcast or a video recording of the game.", + "tokens_per_second": 43.65934498479588, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 70, + "inference_time": 21.308406114578247, + "cpu_usage": 56.9, + "cpu_core_utilization": [ + 47.3, + 76.3, + 47.3, + 56.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_068.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score currently tied at 0-0. The match is taking place on December 26, 2012, and it's a match between Real Madrid and Barcelona. The image is a screenshot from a broadcast, showing the players on the field in action.", + "tokens_per_second": 43.06015335799167, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 68, + "inference_time": 21.764374494552612, + "cpu_usage": 47.5, + "cpu_core_utilization": [ + 37.8, + 72.4, + 30.5, + 49.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_069.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Malaga, with the score currently at 0-0. The match is taking place in the 25th minute of the game. The players are on the field, with one team wearing blue and red striped jerseys and the other team wearing white jerseys. The image appears to be a still from a broadcast or a video recording of the match.", + "tokens_per_second": 43.220403962218256, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 86, + "inference_time": 23.011720895767212, + "cpu_usage": 47.0, + "cpu_core_utilization": [ + 33.4, + 43.6, + 31.7, + 79.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_070.mp4": { + "response": "This image is a screenshot from a video game, specifically a soccer match. The scene shows a player in a blue and red uniform, likely representing the team Unicaja, about to kick the ball. The match is taking place between Malaga and Celta de Vigo, as indicated by the text at the bottom of the screen. The game appears to be a simulation of a real-life match, with detailed graphics and a realistic soccer field. The player's number, 10, is visible on the back of his jersey. The overall atmosphere of the image suggests an intense moment in the game, with the player focused on making a crucial play.", + "tokens_per_second": 43.39715561820535, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 134, + "inference_time": 24.004247188568115, + "cpu_usage": 45.2, + "cpu_core_utilization": [ + 36.5, + 33.6, + 84.4, + 26.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_071.mp4": { + "response": "This image shows a screenshot from a soccer match between Barcelona and Celta de Vigo, which took place on April 3, 2017. The match was part of the 23rd La Liga season. The screenshot captures a moment during the game, with players from both teams on the field. The image is in a widescreen format, with a black border at the top and bottom. There is a watermark in the top right corner that reads \"WOUVA WOUVA\". The image appears to be a still from a video or a broadcast, rather than a live feed.", + "tokens_per_second": 43.466487817577274, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 124, + "inference_time": 22.42779278755188, + "cpu_usage": 45.6, + "cpu_core_utilization": [ + 38.0, + 48.8, + 35.4, + 60.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_072.mp4": { + "response": "The image shows a screenshot from a soccer match between Bosnia and Herzegovina and Argentina, which took place on June 16, 2014. The match was part of the 22nd World Cup, and it was played at the Maracana Stadium in Rio de Janeiro, Brazil. The scoreboard in the image indicates that Argentina won the match with a score of 1-0. The player who scored the goal is not identified in the image. The image also shows the scoreboard at the top, with the score being 1-0 in favor of Argentina. The image is in a widescreen format, with a black border around it.", + "tokens_per_second": 43.3955833964203, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 135, + "inference_time": 22.810667514801025, + "cpu_usage": 43.0, + "cpu_core_utilization": [ + 47.9, + 48.6, + 53.5, + 22.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_073.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and red striped jerseys and the other in white jerseys. The match is taking place on a green field with white lines marking the boundaries. The players are actively engaged in the game, with one player in blue and red striped jersey running towards the ball, while the players in white jerseys are positioned around the ball. The image appears to be a screenshot from a broadcast or a video, as there is a timestamp and a logo visible in the top right corner. The match is part of the Copa America 2007, as indicated by the text at the bottom of the image.", + "tokens_per_second": 43.392131145686385, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 139, + "inference_time": 23.083038330078125, + "cpu_usage": 43.5, + "cpu_core_utilization": [ + 26.6, + 21.1, + 29.5, + 96.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_074.mp4": { + "response": "The image shows a soccer match between Mexico and another team, likely the United States, taking place on July 12, 2012. The match is part of the Champions League, a prestigious international soccer tournament. The players are seen running on the field, with one player in a blue and white striped jersey, possibly representing the United States team, while the other players are in yellow jerseys, likely representing the Mexican team. The scoreboard at the bottom of the image indicates that the match is between Mexico and the United States, with the score currently tied at 0-0. The image captures the intensity and excitement of the match, with the players fully engaged in the game.", + "tokens_per_second": 43.3240962507516, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 142, + "inference_time": 23.30271887779236, + "cpu_usage": 43.9, + "cpu_core_utilization": [ + 22.3, + 28.2, + 98.5, + 26.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_075.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Bayern Munich. The match is part of the Champions League, specifically the 18th round, and it's taking place on March 7, 2010. The scoreboard indicates that FC Barcelona is leading with a score of 2-0. The players are actively engaged in the game, with one player from FC Barcelona having just scored a goal. The image captures the intensity and excitement of the match, with the players focused on the ball and their opponents.", + "tokens_per_second": 42.86824700295901, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 105, + "inference_time": 22.129015922546387, + "cpu_usage": 55.2, + "cpu_core_utilization": [ + 43.2, + 53.7, + 44.1, + 79.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_076.mp4": { + "response": "The image shows a soccer match between Manchester United and AC Milan. The match is part of the Champions League, specifically the 16th round, and took place on March 12, 2013. The players are on the field, with one team wearing white and the other team wearing purple and black striped jerseys. The scoreboard shows that Manchester United is leading with 17 points, while AC Milan has 6 points. The image captures a moment of action, with a player from AC Milan attempting to score a goal. The players are wearing their respective team uniforms, with Manchester United in white and AC Milan in purple and black stripes. The image is a screenshot from a broadcast of the match, with the score and team information displayed at the bottom of the screen.", + "tokens_per_second": 43.49618548005215, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 160, + "inference_time": 23.40229344367981, + "cpu_usage": 42.5, + "cpu_core_utilization": [ + 21.7, + 50.0, + 55.7, + 42.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_077.mp4": { + "response": "The image shows a soccer match between AC Milan and Sevilla, taking place on December 3, 2012. The match is part of the Champions League, and it's being played at the Wanda Metropolitano stadium in Madrid, Spain. The scoreboard indicates that the match is in the 16th minute, with AC Milan leading 1-0. The player in the blue and red striped jersey, who is AC Milan's goalkeeper, is seen in the foreground, likely in the process of making a save or preparing to throw the ball back into play. The background shows the green pitch of the field and the stands with spectators. The image captures the intensity and excitement of a high-stakes Champions League match.", + "tokens_per_second": 43.25714331377744, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 151, + "inference_time": 23.83241081237793, + "cpu_usage": 44.5, + "cpu_core_utilization": [ + 62.2, + 35.9, + 52.0, + 28.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_078.mp4": { + "response": "The image shows a soccer match between Barcelona and Arsenal, with the final score being 2-1 in favor of Barcelona. The match took place on April 6, 2010, and was part of the Champions League. The image appears to be a screenshot from a broadcast or a video recording of the game. The scoreboard at the bottom of the screen indicates that the final score was 2-1, with Barcelona winning the match. The image also includes some text at the bottom that reads \"14 Champions League 06/04/2010 vs Arsenal\", which confirms the date and the teams playing.", + "tokens_per_second": 43.85060391397791, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 126, + "inference_time": 23.163884162902832, + "cpu_usage": 50.2, + "cpu_core_utilization": [ + 55.2, + 37.0, + 71.3, + 37.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_079.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and red striped jerseys and the other in white jerseys. The match is taking place on a green field with white lines marking the boundaries and center circle. The players are spread out across the field, with some closer to the goal area and others further away. The scoreboard at the bottom of the image shows that the team in blue and red striped jerseys is leading with a score of 2-0. The image appears to be a screenshot from a television broadcast of the match.", + "tokens_per_second": 43.467235033644464, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 119, + "inference_time": 23.673866987228394, + "cpu_usage": 47.1, + "cpu_core_utilization": [ + 33.4, + 42.8, + 32.2, + 80.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_080.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Valencia, taking place on May 14, 2010. The match was part of the La Liga competition, and it was a 2-1 victory for Valencia over Barcelona. The image captures a moment during the game, with players from both teams on the field. One of the players from Valencia, wearing a blue and red striped jersey, is seen running towards the ball. The image has a slightly blurry quality, which adds to the dynamic nature of the scene. The match was broadcast on the WOWA TV channel.", + "tokens_per_second": 42.13491054305142, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 121, + "inference_time": 23.47833752632141, + "cpu_usage": 55.8, + "cpu_core_utilization": [ + 52.7, + 49.7, + 55.4, + 65.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_081.mp4": { + "response": "The image shows a soccer match between Barcelona and Arsenal, with Barcelona winning 10-0. The match took place on August 6, 2012, and was part of the Champions League. The image appears to be a screenshot from a broadcast or a video recording of the game. The scoreboard at the bottom of the screen confirms the final score and the date of the match. The image also includes some text, including \"11. Champions League 06.03.2012 vs Arsenal\" and \"10. International Friendly 09.05.2012 vs Arsenal\", which provides additional context about the match.", + "tokens_per_second": 43.3165118354319, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 128, + "inference_time": 25.15186381340027, + "cpu_usage": 51.4, + "cpu_core_utilization": [ + 43.1, + 59.2, + 50.8, + 52.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_082.mp4": { + "response": "The image you've shared is a screenshot from a video game, specifically a soccer match. The game appears to be set in Brazil, as indicated by the text \"10. International Friendly 09.05.2012 vs Brazil\" at the bottom of the screen. The match is between two teams, one wearing blue and white striped jerseys and the other in red and yellow striped jerseys. The scoreboard shows that the team in blue and white has scored 10 goals, while the team in red and yellow has scored 5 goals. The match is taking place on a green field with white lines marking the boundaries and center circle. The image has a slightly blurry quality, which is common in video game screenshots.", + "tokens_per_second": 41.074128731433575, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 155, + "inference_time": 23.635963201522827, + "cpu_usage": 51.6, + "cpu_core_utilization": [ + 37.1, + 50.6, + 42.3, + 76.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_083.mp4": { + "response": "The image shows a soccer match in progress, with players from two teams on the field. One team is wearing blue jerseys, while the other team is wearing red and yellow striped jerseys. The players are actively engaged in the game, with one player in the process of kicking the ball. The match appears to be taking place in a stadium, with spectators visible in the background. The image is likely a screenshot from a television broadcast or a video recording of the match.", + "tokens_per_second": 43.41314970326025, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 102, + "inference_time": 22.28145408630371, + "cpu_usage": 46.5, + "cpu_core_utilization": [ + 34.0, + 42.8, + 31.5, + 77.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_084.mp4": { + "response": "The image shows a soccer player in action on the field. The player is wearing a blue and purple jersey with the number 10 on it. He appears to be in the middle of a game, possibly celebrating a goal or making a play. The background shows a crowd of spectators and a large advertisement board. The image is from a Champions League match between Liverpool and Barcelona, which took place on May 1, 2019.", + "tokens_per_second": 43.62598508461413, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 90, + "inference_time": 22.25613021850586, + "cpu_usage": 46.9, + "cpu_core_utilization": [ + 81.5, + 39.0, + 38.0, + 29.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_085.mp4": { + "response": "The image shows a soccer match between Liverpool and Athletic Club. The player from Liverpool, wearing the number 10 jersey, is in possession of the ball and appears to be dribbling towards the goal. The goalkeeper from Athletic Club is in a diving position, attempting to block the shot. The match is taking place in the Champions League, as indicated by the text at the bottom of the image. The scoreboard shows that Liverpool is currently leading 1-0.", + "tokens_per_second": 42.46289527866258, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 97, + "inference_time": 22.803260803222656, + "cpu_usage": 51.1, + "cpu_core_utilization": [ + 42.8, + 65.0, + 52.8, + 43.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_086.mp4": { + "response": "The image shows a soccer match between Real Betis and Barcelona, with the score currently tied at 0-0. The match is taking place on April 27, 2013, and it's the 7th round of the La Liga competition. The players are on the field, with one team wearing blue and red striped jerseys and the other team wearing white jerseys. The image is a screenshot from a broadcast, with the text \"La Liga 27/04/2013 vs Athletic Club\" visible at the bottom.", + "tokens_per_second": 37.8510224115901, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 114, + "inference_time": 23.451311826705933, + "cpu_usage": 58.0, + "cpu_core_utilization": [ + 55.1, + 48.7, + 80.1, + 48.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_087.mp4": { + "response": "The image shows a soccer match between Bayern Munich and FC Barcelona, a well-known rivalry in the world of football. The match is part of the Champions League, a prestigious international club competition. The scoreboard indicates that FC Barcelona is leading 3-0 at the time the image was captured. The players are in action on the field, with one player from FC Barcelona controlling the ball. The image captures the intensity and excitement of the match, with the crowd likely cheering for their team.", + "tokens_per_second": 43.779372271828834, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 101, + "inference_time": 23.15221858024597, + "cpu_usage": 46.2, + "cpu_core_utilization": [ + 80.2, + 32.2, + 43.0, + 29.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_088.mp4": { + "response": "The image shows a soccer match between Barcelona and Zaragoza, with the match taking place on March 21, 2010. The scoreboard indicates that Barcelona is leading 4-1 at this point in the game. The players are on the field, with some players from both teams visible in the image. The scoreboard also displays the match number, which is 4. The image appears to be a screenshot from a broadcast or a video recording of the match.", + "tokens_per_second": 43.907938544856734, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 96, + "inference_time": 22.601807355880737, + "cpu_usage": 45.6, + "cpu_core_utilization": [ + 60.0, + 28.6, + 61.1, + 32.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_089.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Athletic Club. The match took place on May 30, 2015, and was part of the 3rd Copa del Rey 2015-2016. The score at the time was 3-0 in favor of FC Barcelona. The players on the field are wearing their respective team colors, with FC Barcelona in blue and purple striped jerseys, and Athletic Club in white jerseys with black stripes. The image captures a moment of celebration for FC Barcelona, with players hugging and celebrating on the field.", + "tokens_per_second": 42.785801642658335, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 119, + "inference_time": 22.843401193618774, + "cpu_usage": 45.7, + "cpu_core_utilization": [ + 34.4, + 47.3, + 59.1, + 42.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_090.mp4": { + "response": "The image shows a soccer match between Real Madrid and another team, likely Barcelona, taking place on April 27, 2014. The match is part of the Champions League, as indicated by the text at the bottom of the image. The players are on a green field, with one team wearing white jerseys and the other team wearing red and blue striped jerseys. The scoreboard at the bottom of the image shows that Real Madrid is leading with a score of 2-0. The image appears to be a screenshot from a broadcast or a video recording of the match.", + "tokens_per_second": 43.70943500244427, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 122, + "inference_time": 23.450674772262573, + "cpu_usage": 45.4, + "cpu_core_utilization": [ + 29.0, + 72.4, + 32.0, + 48.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_091.mp4": { + "response": "This image is a screenshot from a soccer match between Real Madrid and Getafe, as indicated by the text at the bottom of the screen. The match took place on April 18, 2007, and Real Madrid won 2-0. The screenshot shows a moment during the game where a player from Real Madrid is attempting to score a goal, while a player from Getafe is trying to block or defend. The image is somewhat blurry, which makes it difficult to see the players' faces and expressions clearly.", + "tokens_per_second": 43.53166357065771, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 108, + "inference_time": 22.15869665145874, + "cpu_usage": 43.6, + "cpu_core_utilization": [ + 61.1, + 56.1, + 30.1, + 27.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_092.mp4": { + "response": "The image shows a video screen capture from a soccer match between Real Madrid and Barcelona. The match is taking place in the Estadio Santiago Bernab\u00e9u in Madrid, Spain. The scoreboard indicates that Real Madrid is leading with a score of 1-0 against Barcelona. The match is part of the UEFA Champions League, specifically the 1st leg of the round of 16. The video appears to be a highlight reel or a summary of the match, as it is cut off at the final moments of the game. The image is likely from a sports news website or a soccer streaming service, providing viewers with a quick overview of the match's progress.", + "tokens_per_second": 43.549125338189285, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 136, + "inference_time": 22.22693133354187, + "cpu_usage": 40.9, + "cpu_core_utilization": [ + 24.2, + 48.0, + 24.0, + 67.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_093.mp4": { + "response": "The image you've provided appears to be a screenshot of a video or a paused video. It features a simple gray background with the YouTube logo prominently displayed in the center. Below the logo, there's text that reads \"WOUVA10\" and a URL \"www.youtube.com/WOUVA10\". The overall design is minimalistic, with no additional elements or details visible.", + "tokens_per_second": 43.81067232421721, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 79, + "inference_time": 19.245930194854736, + "cpu_usage": 37.6, + "cpu_core_utilization": [ + 45.4, + 47.1, + 34.6, + 23.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_094.mp4": { + "response": "The image you've provided appears to be a logo or a symbol, but it's not clear what the specific content or meaning of the symbol is without additional context. The symbol consists of three vertical lines that resemble a stylized \"W\" or \"V\" shape. Below the symbol, there is text that reads \"WOUVA\".\n\nIf you have more information about the context in which this symbol is used or if you need help with something specific related to this symbol, please provide more details.", + "tokens_per_second": 43.90407130082045, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 102, + "inference_time": 19.472203493118286, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 26.0, + 45.5, + 18.2, + 56.1 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/lfm/lfm_60s.json b/API_Transformers/outputs/lfm/lfm_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..6221cb40791812038b39c9af5b7dcf4b3ae9f4ac --- /dev/null +++ b/API_Transformers/outputs/lfm/lfm_60s.json @@ -0,0 +1,660 @@ +{ + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_001.mp4": { + "response": "The image shows a soccer match between Barcelona and Valencia, with Lionel Messi playing for Barcelona. Messi is wearing the number 10 jersey and is in the middle of the field, surrounded by other players from both teams. The match appears to be taking place in a stadium, with spectators visible in the background. The image is a screenshot from a video, as indicated by the \"WOUVA\" logo in the corner. The date on the image is 14/08/2010, which corresponds to August 14, 2010.", + "tokens_per_second": 43.63059232470235, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 116, + "inference_time": 21.40237259864807, + "cpu_usage": 41.1, + "cpu_core_utilization": [ + 21.9, + 68.3, + 27.7, + 46.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_002.mp4": { + "response": "The image shows a soccer match between Barcelona and Atletico Madrid. The player number 10, who is part of the Barcelona team, is about to score a goal. The ball is in mid-air, and the goalkeeper is trying to block it. The scoreboard at the bottom of the image shows that Barcelona is leading with a score of 1-0. The match is taking place on April 25, 2012, and it's the 97th match of the La Liga season.", + "tokens_per_second": 43.51528055992275, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 104, + "inference_time": 23.430800437927246, + "cpu_usage": 46.0, + "cpu_core_utilization": [ + 42.4, + 66.6, + 33.9, + 41.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_003.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score currently tied at 1-1. The match is taking place in the 96th minute of the game, as indicated by the text \"96. Club World Cup 18/12/2011 vs Santos\" at the bottom of the image. The players are on the field, with some in blue and red striped jerseys representing Real Madrid, and others in white jerseys representing Barcelona. The goalkeeper is visible in the background, and the ball is in the air, suggesting that a shot on goal has just been taken. The image captures the intensity and excitement of a high-stakes soccer match.", + "tokens_per_second": 43.14480825013987, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 142, + "inference_time": 23.440691709518433, + "cpu_usage": 43.9, + "cpu_core_utilization": [ + 55.3, + 58.1, + 24.9, + 37.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_004.mp4": { + "response": "The image shows a soccer match between Real Madrid and Atletico Madrid. The match took place on April 23, 2017, and was part of the 93rd La Liga season. The scoreboard indicates that Real Madrid won the match with a score of 2-1. The players on the field are wearing their respective team uniforms, with Real Madrid in blue and Atletico Madrid in white. The image captures a moment during the game, with the ball in play and players from both teams positioned around the field.", + "tokens_per_second": 43.61223216770222, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 109, + "inference_time": 22.73217225074768, + "cpu_usage": 43.6, + "cpu_core_utilization": [ + 57.3, + 25.4, + 61.0, + 30.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_005.mp4": { + "response": "The image shows a soccer match between La Liga teams La Liga and Celta de Vigo. The match took place on September 11, 2018, and was part of the 90th season of La Liga. The teams are competing on a green field, with players from both teams actively engaged in the game. The scoreboard indicates that La Liga is leading with a score of 1-0. The image captures the intensity and excitement of the match, with players running, kicking the ball, and competing for possession.", + "tokens_per_second": 43.19587492692884, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 109, + "inference_time": 22.679114818572998, + "cpu_usage": 44.7, + "cpu_core_utilization": [ + 28.8, + 74.9, + 47.1, + 28.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_006.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Sporting Gijon. The match took place on February 17, 2015, and ended with a score of 1-0 in favor of Sporting Gijon. The image captures a moment during the game, with a player from FC Barcelona in possession of the ball. The scoreboard in the image shows the final score and the time remaining in the match. The image also includes some text at the bottom, which reads \"89. La Liga 17/02/2015 vs Sporting Gijon\".", + "tokens_per_second": 43.57761765477471, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 116, + "inference_time": 23.977370262145996, + "cpu_usage": 46.4, + "cpu_core_utilization": [ + 30.9, + 30.5, + 91.3, + 32.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_007.mp4": { + "response": "The image shows a soccer match between Real Betis and Villarreal, with the score displayed as 87-87. The match took place on September 12, 2012, and was part of the 85th Champions League game. The players are on the field, with one team wearing blue and purple uniforms and the other team wearing white uniforms. The image appears to be a screenshot from a broadcast or a video, as there are watermarks and text overlays indicating the match details and the broadcasting company.", + "tokens_per_second": 43.63718223089107, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 107, + "inference_time": 22.929774284362793, + "cpu_usage": 45.5, + "cpu_core_utilization": [ + 56.0, + 51.6, + 37.2, + 37.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_008.mp4": { + "response": "The image shows a soccer match between Barcelona and Real Valladolid, with the score currently at 0-0. The match is taking place in the 82nd minute of play. The players are on the field, with some in blue and white striped jerseys representing Barcelona, and others in red and yellow jerseys representing Real Valladolid. The ball is in play, and the players are positioned around it. The image captures a moment of intense competition between the two teams.", + "tokens_per_second": 43.53603194535748, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 102, + "inference_time": 22.856961727142334, + "cpu_usage": 44.8, + "cpu_core_utilization": [ + 42.3, + 28.4, + 78.7, + 29.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_009.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Malaga, with the score currently at 1-1. The match is taking place on October 25, 2013, and it's the 82nd minute of play. The players are on the field, with some in blue and purple jerseys representing FC Barcelona and others in white jerseys representing Malaga. The ball is in play, and the players are actively engaged in the game. The image captures the intensity and excitement of a close soccer match.", + "tokens_per_second": 43.67786409828369, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 108, + "inference_time": 22.94000744819641, + "cpu_usage": 45.0, + "cpu_core_utilization": [ + 71.8, + 28.1, + 41.7, + 38.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_010.mp4": { + "response": "The image shows a soccer match between Real Betis and Barcelona, with the score currently at 0-0. The match is taking place in the 79th minute of play on the 17th of March, 2019. The players are on the field, with some in blue jerseys and others in red. The crowd in the stands is visible in the background. The image appears to be a screenshot from a video or broadcast of the match.", + "tokens_per_second": 43.654193436473435, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 96, + "inference_time": 22.56173825263977, + "cpu_usage": 44.2, + "cpu_core_utilization": [ + 27.0, + 53.1, + 58.2, + 38.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_011.mp4": { + "response": "The image shows a soccer match between La Liga teams Real Madrid and Sevilla. The match took place on February 23, 2019, and was part of the 77th Club World Cup. The scoreboard indicates that Sevilla won the match with a score of 2-0. The image captures a moment during the game, with players from both teams on the field. The scoreboard also displays the names of the players, including the goalkeeper from Sevilla, who is seen diving to make a save. The image is a screenshot from a broadcast of the match, with the text \"77. La Liga 23.02.2019 vs Sevilla\" visible at the bottom.", + "tokens_per_second": 42.25990172456639, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 140, + "inference_time": 23.764614582061768, + "cpu_usage": 47.5, + "cpu_core_utilization": [ + 47.9, + 37.2, + 72.1, + 32.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_012.mp4": { + "response": "The image shows a soccer match between Real Madrid and Olympique Lyonnais, which took place on March 24, 2019. The match was part of the 75th Champions League, and it was a game between the two teams. The score at the time of the image was 2-2, with both teams having scored two goals each. The image captures a moment during the match, with players from both teams on the field. The scoreboard at the bottom of the image shows the score as 2-2, and the date as March 24, 2019. The image also includes some text, including \"75. Champions League 13.03.2019: vs Olympique Lyonnais\" and \"75. La Liga 24.02.2008: vs Levante\", which may be related to the context or background of the image.", + "tokens_per_second": 42.53959316212018, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 181, + "inference_time": 25.424269676208496, + "cpu_usage": 58.0, + "cpu_core_utilization": [ + 49.7, + 50.9, + 62.3, + 69.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_013.mp4": { + "response": "The image shows a soccer match between Lazio and Atletico Madrid. The match took place on November 3, 2008, and was part of the 72nd La Liga season. The players are seen on the field, with some wearing yellow jerseys and others in blue. The scoreboard indicates that Lazio won the match with a score of 2-1. The image captures the excitement and intensity of the game, with players running and competing for the ball.", + "tokens_per_second": 43.737237712906385, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 100, + "inference_time": 22.702731609344482, + "cpu_usage": 45.1, + "cpu_core_utilization": [ + 65.1, + 42.1, + 39.7, + 33.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_014.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and red striped jerseys and the other in white jerseys. The match is taking place on a green field with white lines marking the boundaries and center circle. The players are actively engaged in the game, with one player in possession of the ball and others positioned around him. The scoreboard at the bottom of the image indicates that the match is between \"Laiga 08/2012\" and \"Malaga,\" suggesting that this is a Spanish league match. The image captures the intensity and excitement of the game, with players running, kicking, and strategizing to gain control of the ball.", + "tokens_per_second": 43.33501727893497, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 138, + "inference_time": 23.13905143737793, + "cpu_usage": 43.8, + "cpu_core_utilization": [ + 66.1, + 47.0, + 32.0, + 30.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_015.mp4": { + "response": "The image shows a soccer match between La Liga teams Real Madrid and Rayo Vallecano. The match took place on August 4, 2015, and ended with Real Madrid winning 2-0. The player who scored the winning goal is not visible in the image. The scoreboard at the bottom of the image shows the score as 2-0 in favor of Real Madrid.", + "tokens_per_second": 43.71047297473334, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 80, + "inference_time": 22.56661105155945, + "cpu_usage": 47.0, + "cpu_core_utilization": [ + 32.8, + 31.1, + 61.7, + 62.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_016.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score currently tied at 0-0. The match is taking place in the 64th minute of play. The players are on the field, with one team wearing blue and white striped jerseys and the other team wearing white jerseys. The ball is in the air, and it's unclear who will be the first to take possession. The image captures the intensity and excitement of a close soccer match.", + "tokens_per_second": 43.54078657986269, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 100, + "inference_time": 22.969728469848633, + "cpu_usage": 45.4, + "cpu_core_utilization": [ + 32.7, + 73.2, + 36.0, + 39.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_017.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and red striped jerseys and the other in white jerseys. The match is taking place in a stadium with a large crowd of spectators in the background. The scoreboard indicates that the team in blue and red striped jerseys is currently leading the match with a score of 2-0. The image appears to be a screenshot from a television broadcast of the match.", + "tokens_per_second": 43.53893919238898, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 95, + "inference_time": 22.690056324005127, + "cpu_usage": 45.0, + "cpu_core_utilization": [ + 45.9, + 29.2, + 74.2, + 30.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_018.mp4": { + "response": "The image shows a soccer match between Real Madrid and Almeria, taking place on 28th September 2013. The match is part of the La Liga competition, as indicated by the text \"La Liga 28/09/2013 vs Almeria\" displayed at the bottom of the image. The players are seen on the field, with one player from Real Madrid wearing a blue and red striped jersey, while the other player is from Almeria, wearing a white jersey. The image appears to be a screenshot from a broadcast or a video, as there is a watermark in the top right corner that reads \"WOUVA WWW\". The overall scene captures the intensity and excitement of a professional soccer match.", + "tokens_per_second": 42.77804507220416, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 153, + "inference_time": 23.719878673553467, + "cpu_usage": 45.6, + "cpu_core_utilization": [ + 33.4, + 46.8, + 28.9, + 73.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_019.mp4": { + "response": "The image shows a soccer match between Real Madrid and Alaves, with Real Madrid winning 4-0. The match took place on July 12, 2014, and the final score was 4-0 in favor of Real Madrid. The image captures a moment during the game where Real Madrid's goalkeeper is making a save to prevent a goal from being scored. The scoreboard in the image shows the final score of 4-0 in favor of Real Madrid.", + "tokens_per_second": 40.6841438932034, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 98, + "inference_time": 23.04579997062683, + "cpu_usage": 54.3, + "cpu_core_utilization": [ + 46.2, + 41.7, + 86.8, + 42.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_020.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Alcoris. The scoreboard indicates that FC Barcelona is leading 2-0. The match took place on February 12, 2019, and was part of the 56th Champions League round. The players on the field are wearing their respective team jerseys, with Barcelona's team in blue and red striped jerseys and Alcoris in green jerseys. The image captures a moment of action on the field, with players from both teams engaged in the game.", + "tokens_per_second": 43.20499224445443, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 112, + "inference_time": 23.11622142791748, + "cpu_usage": 49.5, + "cpu_core_utilization": [ + 42.4, + 46.3, + 74.6, + 34.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_021.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Atletico Madrid. The scoreboard indicates that Barcelona is leading 2-1 at the time of the screenshot. The player in the blue and red striped jersey, who is likely Lionel Messi, is seen celebrating with his teammates. The match is taking place in a large stadium filled with spectators. The image captures the excitement and intensity of the game, with players and fans alike reacting to the score.", + "tokens_per_second": 43.746940726148665, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 100, + "inference_time": 22.603293895721436, + "cpu_usage": 43.7, + "cpu_core_utilization": [ + 31.5, + 63.2, + 28.2, + 51.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_022.mp4": { + "response": "The image shows a soccer match between Real Madrid and Osasuna, with the score currently at 1-1. The match is taking place in the 53rd minute of the game. The players are on the field, with one team wearing blue and white striped jerseys and the other team wearing purple and blue striped jerseys. The ball is in play, and the players are actively competing for possession. The image captures the intensity and excitement of a close soccer match.", + "tokens_per_second": 43.45687531076368, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 101, + "inference_time": 22.237409830093384, + "cpu_usage": 43.8, + "cpu_core_utilization": [ + 31.2, + 36.9, + 42.2, + 64.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_023.mp4": { + "response": "The image shows a soccer match between La Liga teams La Coru\u00f1a and Sevilla. The match took place on October 30, 2013. The scoreboard indicates that La Coru\u00f1a won the match with a score of 1-0. The player who scored the goal is not clearly visible in the image. The image captures the moment when the ball is being kicked towards the goal, with the goalkeeper and other players in the background. The scoreboard also displays the match number, which is 49.", + "tokens_per_second": 43.49020834357343, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 104, + "inference_time": 22.038825035095215, + "cpu_usage": 43.7, + "cpu_core_utilization": [ + 70.2, + 24.4, + 48.9, + 31.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_024.mp4": { + "response": "The image shows a soccer match between Real Madrid and Atletico Madrid. The match took place on February 6, 2012, and was part of the Spanish Super Cup. The score at the time was 0-0, with both teams having one player each. The image captures a moment of celebration for Real Madrid, with players hugging and cheering. The players are wearing their respective team jerseys, with Real Madrid in green and Atletico Madrid in white. The match was played on a green field with white lines marking the boundaries and center circle. The image is slightly blurry, but the excitement and energy of the moment are palpable.", + "tokens_per_second": 43.259381943491896, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 137, + "inference_time": 23.62865400314331, + "cpu_usage": 44.7, + "cpu_core_utilization": [ + 63.8, + 43.0, + 41.4, + 30.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_025.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue and white striped jerseys and the other in yellow jerseys. The match appears to be taking place in a stadium with a green field and spectators in the stands. The scoreboard at the bottom of the image indicates that the match is between \"La Liga\" and \"Deportivo La Conruana\" on 17/01/2010. The score is currently 0-0, with 45 minutes of playtime remaining. The image also includes a watermark \"WOUVA\" in the top right corner.", + "tokens_per_second": 42.31568205191284, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 125, + "inference_time": 23.539642333984375, + "cpu_usage": 47.6, + "cpu_core_utilization": [ + 43.2, + 34.2, + 80.0, + 33.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_026.mp4": { + "response": "The image shows a soccer match between La Liga teams Real Madrid and Atletico Madrid. The match took place on January 12, 2019, and was part of the 43rd Champions League. The scoreboard indicates that Real Madrid won the match 2-0. The players on the field are wearing their respective team colors, with Real Madrid in yellow and Atletico Madrid in red and white striped jerseys. The image captures a moment of action, with players running and competing for the ball.", + "tokens_per_second": 43.71294869542146, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 107, + "inference_time": 23.50540328025818, + "cpu_usage": 57.7, + "cpu_core_utilization": [ + 47.3, + 49.4, + 90.4, + 43.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_027.mp4": { + "response": "The image shows a soccer match between Real Madrid and Atletico Madrid. The match took place on July 12, 2019, and was part of the La Liga competition. The score at the time of the image was 42-42, indicating a tied game. The players on the field are wearing their respective team uniforms, with Real Madrid in blue and Atletico Madrid in white. The image captures a moment of action, with one player from Atletico Madrid attempting to score a goal against a Real Madrid defender. The scoreboard in the image shows the current score and the time remaining in the match.", + "tokens_per_second": 43.47154398494139, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 127, + "inference_time": 23.50995969772339, + "cpu_usage": 54.7, + "cpu_core_utilization": [ + 71.9, + 45.5, + 54.5, + 46.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_028.mp4": { + "response": "The image shows a soccer match between Real Madrid and Lyon, with the score currently at 1-1. The match is taking place in Lyon's stadium, and the crowd is visible in the background. The players are on the field, with one team wearing blue and red striped jerseys, and the other team wearing white jerseys. The image appears to be a screenshot from a broadcast or a video recording of the match.", + "tokens_per_second": 43.76276301342597, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 91, + "inference_time": 22.579952239990234, + "cpu_usage": 47.9, + "cpu_core_utilization": [ + 38.6, + 39.2, + 72.7, + 41.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_029.mp4": { + "response": "The image shows a soccer match between the United States and the United States of America, with the score currently at 0-0. The match is taking place on April 17, 2016, and is part of the Copa America tournament. The players are wearing their respective team uniforms, with the United States team in white and blue, and the United States of America team in blue and white. The image captures a moment of action on the field, with one player in the foreground and another in the background. The scoreboard at the bottom of the image shows the current score and the date and location of the match.", + "tokens_per_second": 43.4272358854674, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 127, + "inference_time": 23.11515736579895, + "cpu_usage": 44.1, + "cpu_core_utilization": [ + 26.9, + 28.3, + 93.0, + 28.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_030.mp4": { + "response": "The image shows a soccer match between Argentina and Nigeria during the 2010 World Cup. The Argentine player, wearing jersey number 10, is in the foreground, while the Nigerian player is in the background. The match is taking place at the Wanda Metropolitano stadium in Madrid, Spain. The scoreboard indicates that Argentina is leading 2-0. The image captures the intensity and excitement of the match, with both teams competing fiercely for victory.", + "tokens_per_second": 43.72120414399684, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 96, + "inference_time": 23.719250440597534, + "cpu_usage": 47.6, + "cpu_core_utilization": [ + 33.4, + 49.4, + 32.4, + 75.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_031.mp4": { + "response": "The image shows a soccer match in progress, with a player in a blue and red striped jersey, likely the goalkeeper, positioned near the goal. The scoreboard at the bottom of the image indicates that the match is between \"La Liga\" and \"Manchester United\" on July 12, 2019. The score is currently 0-0, with the match in the 32nd minute. The image appears to be a screenshot from a broadcast or a video, as evidenced by the \"WOUVA\" logo in the top right corner.", + "tokens_per_second": 43.42996315564404, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 117, + "inference_time": 23.88136911392212, + "cpu_usage": 46.2, + "cpu_core_utilization": [ + 38.6, + 73.1, + 26.1, + 46.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_032.mp4": { + "response": "The image shows a soccer match between Arsenal and Barcelona, with the scoreboard displaying \"31. Champions League 06/04/2010 vs Arsenal\". The players are on the field, with one team wearing blue and red striped jerseys and the other team in white jerseys. The image appears to be a screenshot from a broadcast or a video, with the text \"WOUVA WWW\" visible in the top right corner. The image is somewhat blurry, making it difficult to see the players' faces clearly.", + "tokens_per_second": 43.38448764784339, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 111, + "inference_time": 23.332343339920044, + "cpu_usage": 46.9, + "cpu_core_utilization": [ + 29.8, + 31.7, + 94.3, + 31.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_033.mp4": { + "response": "The image is a screenshot from a soccer match between Iran and Argentina, which took place on June 21, 2014. The match was part of the 28th World Cup, and it was played in Iran. The screenshot shows a player from the Argentina team, wearing a blue and white striped jersey with the number 10, celebrating a goal. The player is surrounded by other players from both teams, who are in various states of motion, indicating an active play. The background shows a stadium filled with spectators, adding to the atmosphere of the match. The image captures a moment of excitement and triumph for the Argentine team.", + "tokens_per_second": 43.41332866806552, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 133, + "inference_time": 22.96073031425476, + "cpu_usage": 43.6, + "cpu_core_utilization": [ + 32.1, + 36.5, + 63.5, + 42.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_034.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score currently tied at 0-0. The match is taking place on December 26, 2012, and it's the 26th round of the La Liga tournament. The players are on the field, ready to kick off the game. The image captures the anticipation and excitement of the match, with both teams eager to score and take the lead.", + "tokens_per_second": 43.954712321925356, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 88, + "inference_time": 21.961885929107666, + "cpu_usage": 43.5, + "cpu_core_utilization": [ + 62.0, + 32.9, + 56.9, + 22.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_035.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Malaga, with the final score being 2-1 in favor of Barcelona. The match took place on September 25, 2007, and was part of La Liga. The players on the field are wearing their respective team colors, with Barcelona's players in blue and red striped jerseys, and Malaga's players in white jerseys. The image captures a moment during the game, with one player from each team competing for the ball near the goal. The crowd in the background is cheering and reacting to the play, adding to the excitement of the match.", + "tokens_per_second": 43.59995476967648, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 131, + "inference_time": 24.147971153259277, + "cpu_usage": 46.1, + "cpu_core_utilization": [ + 35.3, + 75.6, + 34.6, + 38.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_036.mp4": { + "response": "The image shows a video still from a soccer match between Barcelona and Celta de Vigo, which took place on April 3, 2017. The match was part of the 22nd World Cup, and it was played in Vigo, Spain. The video still captures a moment during the game, with players from both teams on the field. One player from Barcelona, wearing a blue and red striped jersey, is seen celebrating a goal, while a player from Celta de Vigo, dressed in a yellow jersey, is on the ground. The scoreboard in the corner of the image shows that Barcelona is leading 3-0. The video still is from a YouTube channel called WOUVA, which is likely the source of the video.", + "tokens_per_second": 43.36332420549335, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 157, + "inference_time": 23.61562156677246, + "cpu_usage": 43.5, + "cpu_core_utilization": [ + 24.5, + 43.3, + 30.1, + 76.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_037.mp4": { + "response": "The image shows a soccer match between Barcelona and Real Madrid, with Barcelona winning 3-1. The match took place on July 3, 2012, and was part of the Champions League. The players on the field are wearing their respective team jerseys, with Barcelona's players in blue and Real Madrid's players in white. The image captures a moment during the game, with one player from Real Madrid attempting to score a goal. The scoreboard in the image shows the final score of 3-1 in favor of Barcelona.", + "tokens_per_second": 42.54911962641917, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 111, + "inference_time": 22.72683596611023, + "cpu_usage": 46.7, + "cpu_core_utilization": [ + 36.6, + 67.4, + 38.1, + 44.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_038.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Manchester United. The match is part of the Champions League, specifically the 17th round, and took place on April 16, 2019. The final score was 2-1 in favor of Manchester United. The image captures a moment during the game where a Barcelona player is about to score a goal. The scoreboard at the bottom of the image confirms the final score and the date of the match.", + "tokens_per_second": 43.650739641437625, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 95, + "inference_time": 21.57595181465149, + "cpu_usage": 43.1, + "cpu_core_utilization": [ + 30.0, + 28.1, + 88.6, + 25.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_039.mp4": { + "response": "The image shows a soccer match between FC Barcelona and AC Milan during the 16th round of the Champions League on March 12, 2013. The match was played at the Wanda Metropolitano stadium in Madrid, Spain. The scoreboard indicates that AC Milan won the match 2-1 with goals from Andrea Pirlo and Mario Balotelli. The image captures a moment of excitement and celebration for the Barcelona team after their victory.", + "tokens_per_second": 43.762603428053495, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 92, + "inference_time": 22.646861791610718, + "cpu_usage": 45.5, + "cpu_core_utilization": [ + 33.0, + 49.0, + 26.4, + 73.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_040.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Valencia, with the score currently tied at 0-0. The match is taking place on May 14, 2010, and it's the 13th round of the La Liga competition. The players are on the field, with one team wearing blue and red striped jerseys and the other team wearing white jerseys. The image is a screenshot from a broadcast, with the text \"13. La Liga 14.05.2010 vs Valencia\" displayed at the bottom.", + "tokens_per_second": 43.78116935254593, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 114, + "inference_time": 23.798336505889893, + "cpu_usage": 46.5, + "cpu_core_utilization": [ + 34.9, + 50.4, + 61.9, + 38.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_041.mp4": { + "response": "The image shows a soccer match between Barcelona and Arsenal, with the score at 1-1. The match took place on June 5, 2012, and was part of the Champions League. The players are on the field, with one team wearing blue and white striped jerseys and the other team wearing red and yellow striped jerseys. The image is a screenshot from a broadcast, with the text \"10. International Friendly 09/05/2012 vs Getafe\" displayed at the bottom.", + "tokens_per_second": 43.213784548431164, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 110, + "inference_time": 22.226094722747803, + "cpu_usage": 43.9, + "cpu_core_utilization": [ + 26.1, + 34.8, + 40.3, + 74.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_042.mp4": { + "response": "The image shows a soccer match between two teams, one wearing blue jerseys and the other wearing red and yellow striped jerseys. The match is taking place on a green field with white lines marking the boundaries. The players are actively engaged in the game, with some running and others in various positions on the field. The scoreboard at the bottom of the image indicates that the match is between \"Copacabal Rei\" and \"Getafe,\" with the current score being 0-0. The match is part of the \"Champions League\" and is scheduled for May 1, 2019. The image captures the intensity and excitement of a live soccer match.", + "tokens_per_second": 42.19237599556414, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 141, + "inference_time": 23.82616138458252, + "cpu_usage": 55.3, + "cpu_core_utilization": [ + 45.5, + 44.3, + 48.7, + 82.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_043.mp4": { + "response": "The image shows a soccer match between FC Barcelona and Athletic Club. The match is part of the 7th round of the La Liga 2014-2015 season, which took place on March 17, 2019. The final score of the match was 1-0 in favor of FC Barcelona. The players on the field are wearing their respective team jerseys, with one team in blue and red striped jerseys and the other team in white jerseys. The image captures a moment during the match, with the players actively engaged in the game.", + "tokens_per_second": 43.38041166364092, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 119, + "inference_time": 24.084861993789673, + "cpu_usage": 46.5, + "cpu_core_utilization": [ + 60.8, + 28.2, + 62.6, + 34.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_044.mp4": { + "response": "The image shows a soccer match between Bayern Munich and FC Barcelona, with the final score being 1-0 in favor of Bayern Munich. The match took place on May 21, 2015, and was part of the Champions League. The image appears to be a screenshot from a broadcast or a video recording of the game.", + "tokens_per_second": 43.951395848409305, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 69, + "inference_time": 22.809069871902466, + "cpu_usage": 47.3, + "cpu_core_utilization": [ + 63.5, + 34.4, + 59.6, + 31.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_045.mp4": { + "response": "The image shows a soccer match between Real Madrid and Barcelona, with the score currently tied at 0-0. The match is taking place on April 27, 2014, and it's the 2nd Champions League match between these two teams. The players are on the field, with one team wearing white jerseys and the other team wearing purple and pink striped jerseys. The image appears to be a screenshot from a broadcast or a video, with the text \"2 Champions League 27/04/2014 vs Real Madrid\" displayed at the bottom.", + "tokens_per_second": 43.39903419831942, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 120, + "inference_time": 22.500088214874268, + "cpu_usage": 43.5, + "cpu_core_utilization": [ + 27.2, + 22.6, + 27.1, + 97.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_046.mp4": { + "response": "The image shows a soccer match between Real Madrid and Getafe, with the final score being 3-0 in favor of Real Madrid. The match took place on April 18, 2007, and was broadcast on the WOWA channel. The image appears to be a screenshot from a television broadcast or a sports news website, showing the scoreboard of the match.", + "tokens_per_second": 43.76197273361684, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 78, + "inference_time": 21.466525077819824, + "cpu_usage": 43.3, + "cpu_core_utilization": [ + 51.7, + 26.1, + 50.6, + 44.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_047.mp4": { + "response": "The image you've provided appears to be a screenshot of a video or a still frame from a video. The background is predominantly black, and there are two distinct elements on the screen:\n\n1. A logo in the center, which consists of a stylized \"W\" and \"V\" shape, possibly representing the name \"WOUVA10\" as indicated by the text below it. The logo is in a light color, contrasting with the black background.\n\n2. Below the logo, there is a URL: \"www.youtube.com/WOUVA10\". This suggests that the video or content is hosted on YouTube and is related to the account or channel named \"WOUVA10\".\n\nThe overall impression is that this is a branding or promotional image for a YouTube channel or video content related to \"WOUVA10\".", + "tokens_per_second": 43.197034762295125, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 171, + "inference_time": 21.49216103553772, + "cpu_usage": 36.9, + "cpu_core_utilization": [ + 16.8, + 20.4, + 20.7, + 89.7 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/mini/mini_30s.json b/API_Transformers/outputs/mini/mini_30s.json new file mode 100644 index 0000000000000000000000000000000000000000..a12de40fcee7909c684bb239ae9dfd2de53c6f1c --- /dev/null +++ b/API_Transformers/outputs/mini/mini_30s.json @@ -0,0 +1,324 @@ +{ + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_001.mp4": { + "response": "The video showcases a series of moments from what appears to be a football match, focusing on the celebration and actions of players wearing Barcelona jerseys. Initially, there are plain black frames with text in the top right corner indicating \"WOUV\" which might suggest branding or watermarking by WOUV. The scene transitions into dynamic action shots where individuals in blue and red striped jerseys are seen celebrating, interacting, and displaying emotions typical of sports events such as joy and excitement.\n\nThroughout the sequence, various close-up shots highlight individual reactions including expressions of triumph and camaraderie among teammates. One notable moment shows an individual pointing upwards, possibly acknowledging the crowd or making a gesture related to their performance. Another frame captures someone raising both hands while looking up, likely expressing elation after scoring a goal or contributing significantly to the game's outcome.\n\nAs the clips progress, we see different angles capturing more interactions between players, suggesting teamwork and shared success. There is also a brief glimpse at another player in a green jersey, hinting at opposing teams present during this particular event.\n\nThe final segment returns to black screens bearing white text that reads \"TOP 100 GOALS LIONEL MESSI,\" providing context for these celebratory scenes as part of Messi's milestone goals list. This suggests the footage may have been compiled to celebrate his achievements within competitive matches.", + "tokens_per_second": 10.464318161039833, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 290, + "inference_time": 47.78537893295288, + "cpu_usage": 24.3, + "cpu_core_utilization": [ + 22.4, + 30.4, + 23.4, + 21.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_002.mp4": { + "response": "The video captures a series of soccer match highlights, focusing on critical moments where goals are scored. Initially, we see an orange team attacking the goal defended by a white team with several players in close proximity to the net. The scene quickly transitions into action as one player from the orange team attempts a shot towards the goal while being closely marked by two defenders.\n\nAs the play unfolds, the focus shifts to another part of the pitch where multiple players from both teams converge near the center circle. A significant moment occurs when an orange-clad player manages to get past his opponents and makes contact with the ball, resulting in a successful pass or shot that leads to a goal. This is followed by celebratory reactions among teammates who embrace each other in jubilation.\n\nContinuing through subsequent scenes, there's intense competition for possession within the penalty area, culminating in a decisive strike by an orange-shirted player whose attempt at scoring results in yet another goal. Celebrations ensue once again amongst members of the same team, highlighting the significance of this particular achievement during the match.\n\nThe narrative then moves forward to depict earlier events in the game, specifically capturing a moment before the mentioned goal was scored. Here, we observe gameplay dynamics involving passing between blue and red striped players against their opponents dressed in white. Despite efforts made by the defending team to intercept, they fail to prevent what appears to be a clear scoring opportunity presented by the offensive side.\n\nFinally, attention returns to the defensive actions of the opposing team attempting to thwart any further advances by the strikers. As the sequence progresses, it becomes evident that despite valiant efforts, the attackers maintain control over the situation until ultimately making use of available space to maneuver around the goalkeeper and score a goal. Throughout these sequences, expressions of determination, skillful maneuvers, and strategic plays contribute to shaping the outcome of matches featured in the footage provided.", + "tokens_per_second": 11.217051883159877, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 397, + "inference_time": 53.80337476730347, + "cpu_usage": 37.5, + "cpu_core_utilization": [ + 37.0, + 34.8, + 51.4, + 26.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_003.mp4": { + "response": "The video captures a sequence of events from two distinct soccer matches. Initially, it features players in action during La Liga on 14/03/2010 against Valencia. The focus is primarily on the goal area where a player wearing an orange jersey attempts to score while being closely guarded by opponents dressed in white jerseys with numbers visible like \"8\" and \"17\". The goalkeeper, clad in bright orange attire, dives towards his right side attempting to block the shot. The background reveals spectators watching intently behind advertising boards displaying various texts.\n\nTransitioning from this live-action scene, the next segment shifts to highlights or replays involving FC Barcelona playing against Getafe on 28/04/2015 as indicated by text overlays at the bottom of each frame. This part showcases strategic gameplay including passing among teammates, defensive maneuvers by yellow-jerseyed opponents, and moments leading up to potential scoring opportunities near the opposing team's goalpost marked by black protective netting. Throughout these sequences, there are close-ups that emphasize individual actions such as ball control and expressions of determination or joy reflecting pivotal moments within the game.", + "tokens_per_second": 10.186586825003255, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 248, + "inference_time": 42.50302219390869, + "cpu_usage": 38.7, + "cpu_core_utilization": [ + 19.7, + 39.0, + 22.6, + 73.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_004.mp4": { + "response": "The video captures various moments from a football match between Getafe and Barcelona, as well as another game involving Atletico Madrid. Initially, it shows the perspective through the goal net of a team playing against Getafe on April 28, 2015, highlighting player number 10 in action during gameplay. The focus then shifts to individual players wearing jerseys with numbers 11 and 10, likely Messi, celebrating or engaging actively within the game environment filled with spectators. Transitioning scenes depict the ongoing dynamic play where players are seen maneuvering near the goal area amidst an engaged crowd.\n\nAs the footage progresses, there's emphasis on close-up shots that emphasize facial expressions and reactions possibly reflecting tension or excitement related to the unfolding events on the field. These segments capture not just the physical actions but also emotional responses from both teams involved. Additionally, specific frames highlight key interactions such as ball movements towards the goalpost, indicating critical points like potential goals being scored or defended.\n\nThe narrative culminates with intense sequences showing attempts at scoring by players identified with jersey numbers 10 and 11, suggesting pivotal plays leading up to crucial moments in the matches. Overall, this compilation offers viewers a comprehensive glimpse into the intensity, strategy, and atmosphere characteristic of competitive football games.", + "tokens_per_second": 10.441963736015857, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 272, + "inference_time": 44.2023229598999, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 45.7, + 33.2, + 52.3, + 20.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_005.mp4": { + "response": "The video showcases various moments from a soccer match, focusing on the dynamic actions and interactions between players. It begins with an intense moment where a player in yellow attempts to score against a goalkeeper dressed in black, set against a backdrop of a blue net. The scene transitions to display text indicating \"97. LaLiga 26/02/2012 vs Atletico Madrid,\" suggesting this is part of a league game involving these teams.\n\nAs the action unfolds, we see multiple scenes capturing different aspects of gameplay. Players are seen dribbling, passing, and strategizing within the confines of the field marked by white lines. Advertisements for brands like Sony and 3D can be spotted along the sidelines, adding context to the professional setting of the match. Text overlays such as \"96. Club World Cup 18/12/2011 vs Santos\" appear throughout the frames, likely denoting specific matches or events within the broader tournament being covered.\n\nNotable moments include shots taken at goal by players wearing red and blue jerseys amidst opposition from defenders in white uniforms. These sequences highlight the competitive nature of the sport, showcasing both offensive plays aimed at scoring and defensive efforts to prevent goals. There's also emphasis on individual skills through close-ups of players controlling the ball and executing precise movements under pressure from opponents.\n\nThe final segments depict celebrations among team members, signifying successful plays or victories over adversaries. This joyful aftermath underscores the emotional highs experienced during sports competitions, reflecting camaraderie and achievement among teammates after contributing to their collective success on the pitch. Overall, the video captures the essence of soccer\u2014a blend of athletic prowess, strategic planning, and the exhilarating spirit of competition.", + "tokens_per_second": 11.117427002797763, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 375, + "inference_time": 52.02060675621033, + "cpu_usage": 37.7, + "cpu_core_utilization": [ + 22.2, + 47.8, + 42.5, + 38.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_006.mp4": { + "response": "The video captures a sequence of events during a football match, focusing on key moments and player interactions. It begins with players from two teams positioned near the goal area, preparing for an offensive play. A player in blue is seen making contact with the ball while being closely marked by opponents dressed in white. The scene progresses to show another attempt at scoring as the same player in blue maneuvers past defenders, aiming towards the goal.\n\nAs the action unfolds, the focus shifts to highlight a particular moment where a player wearing number 10 from the team in blue successfully scores a goal against the goalkeeper dressed in yellow. This event is followed by close-up shots emphasizing the celebration of this achievement among the scorer and his teammates, suggesting a pivotal point or victory in the game.\n\nSubsequently, attention moves to different parts of the pitch where other players are actively engaged in gameplay. Players in red and purple engage in strategic positioning and movement across the field, indicating ongoing attempts to either defend their position or create opportunities for advancement. One segment features intense competition for possession near the opposing team's half, culminating in what appears to be a successful attack resulting in a goal.\n\nThroughout these sequences, the crowd in the background adds to the atmosphere of excitement typical of live sports events. Text overlays provide context such as \"94. Champions League 24/11/2015 Vs AS Roma,\" situating the footage within specific timeframes and matches involving notable clubs like Barcelona and AS Roma.", + "tokens_per_second": 10.739343508452546, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 318, + "inference_time": 47.87968993186951, + "cpu_usage": 49.1, + "cpu_core_utilization": [ + 40.6, + 44.5, + 64.1, + 47.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_007.mp4": { + "response": "The video captures a sequence of events during a football match, highlighting the intense and strategic nature of the sport. It begins with players from two opposing teams positioned on the field, ready for action. As the game progresses, one team advances towards the opponent's goal area, showcasing their offensive play. The tension builds as the attacking player makes contact with the ball, attempting to maneuver past defenders.\n\nThe scene then shifts focus to a critical moment near the goalpost where an attack is underway against defensive efforts. A player in white attempts to score while being closely guarded by opponents dressed in red and blue stripes. The goalkeeper, clad in yellow, prepares to make a save but ultimately fails, allowing the ball to enter the net. This successful shot results in a celebration among the scoring team members who are visibly elated by the achievement.\n\nThroughout these moments, the background consistently features elements typical of a professional football stadium such as advertising boards along the sidelines and spectators filling the stands. Text overlays provide context about the event, specifying it as part of the \"Champions League\" dated \"24/11/2015\" versus \"AS Roma,\" indicating that this footage was captured during a significant European competition match between these two clubs.", + "tokens_per_second": 10.35898493058187, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 263, + "inference_time": 47.03187942504883, + "cpu_usage": 45.1, + "cpu_core_utilization": [ + 40.8, + 41.1, + 35.0, + 63.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_008.mp4": { + "response": "The video depicts a sequence from a soccer match between two teams, one in blue and the other in white. It begins with an intense scene where the goalkeeper, dressed in green, is diving to his right attempting to save a shot on goal by players wearing white jerseys. The ball appears to be heading towards the top corner of the net despite the efforts of the goalkeeper. As the action unfolds, we see various angles capturing the moment of impact as the ball crosses the line, indicating that a goal has been scored. Following this pivotal event, the focus shifts to celebrating moments among the players. A player in blue, likely part of the scoring team, engages in jubilant gestures such as raising his arms and holding up what seems to be a jersey or flag. This celebratory behavior suggests he may have played a key role in the goal being scored. Throughout these scenes, there are glimpses of spectators in the background adding context to the setting of a live sports event. Additionally, text overlays provide information about the specific game details including the date \"23/04/2017\" and the opposing team \"Real Madrid,\" situating the events within a particular timeframe and competition context.", + "tokens_per_second": 10.245622255156498, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 253, + "inference_time": 42.952744007110596, + "cpu_usage": 37.7, + "cpu_core_utilization": [ + 19.0, + 25.2, + 47.2, + 59.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_009.mp4": { + "response": "The video captures a sequence of events from a football match between two teams, one wearing blue and the other in red-and-white striped jerseys. Initially, players are seen on the field actively engaging with each other as they attempt to control and pass the ball amidst their opponents' defensive efforts. As the game progresses, there is visible movement towards the goal area where intense action unfolds near the penalty box. The player in the blue jersey makes significant advances toward scoring, maneuvering past defenders but ultimately facing resistance that leads to him falling down. Despite this setback, he quickly recovers and continues his pursuit of the goal. In a climactic moment, the player manages to evade multiple challenges by opposing team members and successfully scores a goal. This achievement elicits an enthusiastic reaction from both teammates celebrating around him and fans in the stands who respond with cheers and applause. The celebration underscores the significance of the score within the context of the ongoing league matches displayed throughout the clips provided.", + "tokens_per_second": 9.668944653220052, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 205, + "inference_time": 38.62101435661316, + "cpu_usage": 37.4, + "cpu_core_utilization": [ + 29.2, + 59.9, + 33.9, + 26.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_010.mp4": { + "response": "The video captures a series of moments from two different football matches, each with its own set of events. In the first part of the footage, we see players in light blue and dark purple kits engaging in gameplay on a green pitch. The sequence starts with individual movements where one player is seen running towards the ball while others are scattered around. As the action unfolds, there's an attempt to control or pass the ball by these individuals amidst defensive stances shown by some opponents. Eventually, the scene transitions to what appears to be a critical moment near the goal area as a goalkeeper in yellow attempts to block a shot made by a player in a dark kit who has dribbled past several defenders.\n\nThe second segment shifts focus to another match involving teams wearing red and blue striped jerseys against those in white. This portion begins similarly with close-up shots of the game, highlighting the movement of the ball across the grassy field and quick passes between team members. Notably, this time it shows a successful play leading to celebration among the scoring team, indicating that they have scored a goal despite efforts by the opposing side represented in white. Throughout both parts of the video, text overlays provide context about the specific matches being depicted\u2014one dated 18/10/2014 versus Eibar and the other occurring on 09/11/2019 vs Celta de Vigo.", + "tokens_per_second": 10.571052792003025, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 293, + "inference_time": 46.55736207962036, + "cpu_usage": 58.2, + "cpu_core_utilization": [ + 47.8, + 64.3, + 48.6, + 72.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_011.mp4": { + "response": "The video captures a series of soccer matches, focusing primarily on the goal area and key moments leading up to goals. Initially, it shows a close-up view of a damaged net with visible tension wires, suggesting an intense moment during play. The scene then shifts to a player wearing a blue and red striped jersey celebrating, indicating that his team may have scored or achieved something significant in the game.\n\nSubsequently, another frame highlights the same player raising both hands in what appears to be a gesture of triumph or acknowledgment towards someone off-camera. This is followed by scenes from different games where players are seen dribbling past opponents near the penalty box, creating scoring opportunities. One particular sequence features multiple attempts at shooting the ball into the net, culminating in a successful score. Throughout these sequences, various shots capture the reactions of teammates and the goalkeeper attempting to block the shot but failing due to precise placement of the ball.\n\nThe final part of the video includes dynamic action shots of players competing for control of the ball amidst defensive pressure from their opponents. There's also a brief glimpse of fans in the stands cheering enthusiastically, adding to the atmosphere of excitement typical of live sports events. Overall, the footage provides a snapshot of high-stakes soccer gameplay, highlighting critical interactions between players as they strive to outmaneuver each other within tight spaces around the goal area.", + "tokens_per_second": 10.543281182614821, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 285, + "inference_time": 45.94058084487915, + "cpu_usage": 37.9, + "cpu_core_utilization": [ + 35.8, + 33.7, + 38.4, + 43.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_012.mp4": { + "response": "The video captures a sequence of events from a soccer match, showcasing the dynamic nature of the game. It begins with players in blue and white jerseys actively engaged on the field. The focus then shifts to a player in a blue jersey who is dribbling past opponents towards the goal area, displaying skillful maneuvering under pressure. As the action unfolds, another player in a blue jersey makes a decisive pass leading to an attempt at scoring near the net, where the goalkeeper is alerted by the movement of the ball.\n\nThe narrative continues as the scene transitions to show a close-up interaction between a player in a purple jersey and a defender wearing green, highlighting moments of intense competition for control of the ball. This leads into a pivotal moment where the player in purple successfully scores a goal, evidenced by his celebration with teammates nearby, indicating a successful play or victory.\n\nFollowing this triumphant moment, there's a shift back to gameplay involving multiple players, suggesting ongoing efforts to either defend against goals or advance positions strategically. The intensity escalates when a shot at the goal is blocked by the defending team, capturing a critical defensive effort that halts the offensive push.\n\nThe concluding segments depict further attempts at scoring within the same context, showing varied outcomes such as shots being saved or missed, emphasizing the unpredictable elements inherent in soccer matches. Throughout these sequences, the camera work effectively highlights key actions while maintaining viewer engagement through rapid cuts between different parts of the pitch, underscoring the fast-paced environment typical of competitive sports like soccer.", + "tokens_per_second": 10.81279707721416, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 322, + "inference_time": 48.59676718711853, + "cpu_usage": 35.6, + "cpu_core_utilization": [ + 25.0, + 52.5, + 36.8, + 28.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_013.mp4": { + "response": "The video depicts a series of events from a football match. Initially, the focus is on an intense moment where players in yellow and blue jerseys are engaged near the goal area with one player attempting to score while another tries to block it. The scene transitions smoothly into subsequent actions showing various attempts at scoring or defending goals by both teams throughout different parts of the field.\n\nAs the action unfolds, we see multiple instances of gameplay including dribbling past opponents, strategic passes among teammates, and defensive maneuvers aimed at preventing goals. Notably, there's a sequence highlighting a goalkeeper making saves during critical moments when opposing team players were trying to get close to the net. This indicates high-stakes competition typical of professional matches.\n\nThe latter part of the footage captures celebrations indicating successful plays such as goals being scored, followed by jubilant reactions from players who likely contributed to these achievements. Celebrations often involve gestures like pointing upwards which could symbolize triumph or acknowledgment of a significant play.\n\nTowards the end, the camera angle shifts back to broader views of the pitch displaying ongoing gameplay dynamics between two sets of players wearing distinct kits representing their respective teams. There's also a brief interlude showcasing textual information about the game context, possibly providing details relevant to viewers not familiar with real-time commentary.\n\nOverall, this compilation offers insights into competitive football through its depiction of key moments that encapsulate strategy, athleticism, and emotional responses intrinsic to the sport.", + "tokens_per_second": 10.750196309237642, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 312, + "inference_time": 47.03905153274536, + "cpu_usage": 33.7, + "cpu_core_utilization": [ + 22.7, + 33.2, + 16.0, + 63.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_014.mp4": { + "response": "The video captures a sequence from a football match between two teams, one in blue and the other in white. The action begins with a player wearing number 10 dribbling past defenders towards the goal area. As he approaches the penalty box, he makes contact with the ball but it appears to be deflected or blocked by an opposing team member. Despite this setback, the player continues his advance, maintaining control of the ball while being closely marked by opponents. His determination is evident as he maneuvers through tight spaces, showcasing agility and skill. Eventually, the player manages to maneuver past multiple defenders near the goal line before making a decisive play that leads to a successful shot on goal. The moment of impact shows the ball hitting the back of the net, indicating a scoring attempt has been made. Following the success, players from both teams can be seen reacting to the outcome\u2014celebrating for the scorer and possibly regrouping strategically based on the situation post-goal. The background features elements typical of a professional football stadium, including spectators and advertising boards along the sidelines.", + "tokens_per_second": 9.95437319443051, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 227, + "inference_time": 40.29177522659302, + "cpu_usage": 33.9, + "cpu_core_utilization": [ + 14.9, + 36.2, + 35.4, + 49.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_015.mp4": { + "response": "The video captures a sequence of events from two different soccer matches. In the first segment, set during the Champions League match between FC Barcelona and Juventus on 12/09/2017, we see a player in a blue jersey with orange accents preparing to take a free kick against players dressed in black and white jerseys. The scene transitions smoothly as he runs up and kicks the ball towards the goal area where other players are positioned defensively. The tension escalates when the shot is taken; however, it misses the target and lands outside the penalty box. Following this attempt, the focus shifts briefly to another part of the field before returning to the initial play, capturing more interactions among the players involved in the ongoing game.\n\nIn contrast, the second portion of the footage documents a La Liga match involving Huesca versus an opposing team that wears red and purple kits. This section starts similarly with gameplay action near the opponent's goalpost but quickly changes pace upon scoring a goal by a player in dark attire who successfully navigates past defenders and takes a decisive shot at the net. Celebrations ensue afterward amongst teammates and fans alike, highlighting the momentous nature of the score. As the celebration continues, additional scenes show further attempts made by both teams within their respective halves while maintaining competitive spirit throughout these sequences captured live-action style typical for sports broadcasts aiming to provide viewers with dynamic visual experiences reflecting real-time sporting events.", + "tokens_per_second": 10.678644069294082, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 302, + "inference_time": 46.50194501876831, + "cpu_usage": 34.2, + "cpu_core_utilization": [ + 30.7, + 18.2, + 50.7, + 37.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_016.mp4": { + "response": "The video depicts a sequence of events from two different soccer matches. Initially, it showcases an intense moment in the La Liga match between FC Barcelona and Huesca on 02/09/2018, where a player in a blue jersey is seen controlling the ball while being pursued by opponents dressed in white jerseys with red crosses. The scene captures the dynamic nature of the game as players maneuver to gain possession or create scoring opportunities.\n\nFollowing this, the focus shifts to another part of the same La Liga encounter, highlighting a critical play near the goal area involving players wearing yellow and pink kits. A player in yellow attempts to advance past defenders clad in pink, culminating in a shot at the goal that results in a save by the goalkeeper. This segment emphasizes offensive efforts against defensive tactics within a competitive setting.\n\nThe narrative then transitions to a Champions League fixture between PSV and an opposing team on 28/11/2018. Here, attention is drawn to gameplay dynamics centered around the midfield region, featuring interactions among players in contrasting kit colors: one team in yellow and the other in red-and-white striped uniforms. These moments reflect strategic positioning and movement crucial for maintaining control over the ball amidst pressure from adversaries.\n\nSubsequently, scenes unfold depicting continued action during the same Champions League match, focusing on plays leading up to potential goals. Players are observed executing passes and dribbles aimed at breaking through opponent defenses and capitalizing on scoring chances. Notably, there's emphasis on the interplay between attackers seeking avenues towards the net and defenders attempting to thwart these advances.\n\nThroughout these sequences, visual cues such as text overlays provide context regarding the teams involved (\"FC Barcelona vs Huesca,\" \"PSV vs [opponent],\" and \"La Liga 29/10/2019: vs Real Valladolid\") alongside dates indicating when each event occurred. Additionally, branding elements like logos suggest media coverage or sponsorship affiliations associated with the depicted matches. Overall, the compilation offers viewers insights into various facets of professional soccer competitions, showcasing both individual skillful maneuvers and collective team strategies employed across multiple fixtures.", + "tokens_per_second": 11.436392353304559, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 462, + "inference_time": 57.417115211486816, + "cpu_usage": 31.0, + "cpu_core_utilization": [ + 32.8, + 51.8, + 16.3, + 23.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_017.mp4": { + "response": "The video captures a sequence of events from two distinct soccer matches. The first part features FC Barcelona in action against Real Valladolid on October 29, 2019, during La Liga play. It begins with an intense moment near the goal where players are engaged in a crucial play; one player is seen attempting to score while another tries to block it. The goalkeeper, dressed in red, dives towards his right in an effort to save the shot. As the scene progresses, there's visible celebration among some team members wearing blue and maroon jerseys, suggesting that they may have scored or made significant plays leading up to this point.\n\nTransitioning to the second part of the video, we see scenes from a Copa del Rey match between Malaga and FC Barcelona on January 16, 2013. This segment highlights gameplay focusing on movement across the field, showcasing strategic positioning by both teams as they attempt to advance the ball towards the opponent's goal. Players in dark uniforms display coordination and agility as they navigate through opposing defenders clad in light-colored kits. Notably, there\u2019s emphasis on moments when individual players either pass the ball skillfully to teammates or make attempts at scoring opportunities under defensive pressure.\n\nThroughout these clips, text overlays provide context about each game including dates, locations, competition names like \"La Liga\" and \"Copa del Rey,\" along with identifiers such as \"82\" for the Bar\u00e7a vs. Real Valladolid encounter and \"81\" for the earlier match involving Malaga. These details help frame the footage within specific sporting contexts and timeframes.", + "tokens_per_second": 10.977650622627742, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 348, + "inference_time": 50.171597719192505, + "cpu_usage": 33.8, + "cpu_core_utilization": [ + 29.7, + 20.5, + 31.8, + 53.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_018.mp4": { + "response": "The video captures a series of soccer match moments, primarily focusing on the interactions between players and their movements around the field. It starts with an overview of a game in progress where several players are seen moving across the pitch, engaging in typical gameplay activities such as running, passing, and positioning themselves strategically near the goal area. The setting is clearly a professional or semi-professional environment given the presence of spectators in the background and visible advertisements along the sidelines.\n\nAs the sequence continues, there's a shift towards more detailed actions involving individual player efforts to control and advance the ball. One particular moment shows a close-up of a player dribbling past opponents, highlighting the physicality and skill involved in maintaining possession under pressure from defenders. This segment emphasizes agility and quick decision-making by the player attempting to navigate through the opposing team's defense.\n\nFurther into the footage, we observe attempts at scoring opportunities leading up to potential shots on goal, underscoring the tense atmosphere typically present during critical parts of a match. Players position themselves defensively while others prepare for offensive plays, showcasing both sides of the competitive nature of the sport.\n\nTowards the end of the provided frames, the focus narrows down to specific instances of play where the ball is passed among teammates or intercepted by different individuals, reflecting the dynamic flow of the game. Each frame encapsulates a snapshot of movement that contributes to the overall narrative of strategic maneuvers and tactical execution within the context of a live soccer match.", + "tokens_per_second": 10.787881404092886, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 314, + "inference_time": 46.91726207733154, + "cpu_usage": 33.3, + "cpu_core_utilization": [ + 15.0, + 23.0, + 64.7, + 30.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_019.mp4": { + "response": "The video captures a series of moments from two distinct football matches. Initially, it showcases an intense moment where the ball is shot towards the goal during what appears to be La Liga match between 01/05/2005 and Albacete. The action culminates in a celebration by players wearing blue and red striped jerseys, indicating they are likely part of Barcelona FC given their iconic colors. This scene transitions smoothly into another sequence depicting a different setting with a player in a similar jersey celebrating on the field amidst cheering fans.\n\nAs the narrative progresses, there's a shift back to more formal gameplay settings involving teams dressed in purple and green stripes. These scenes highlight interactions near the center circle before transitioning to various stages of play including players positioning themselves for kick-off and engaging directly with each other. Throughout these sequences, referees can be seen making critical decisions which influence the flow of the game. The intensity heightens as confrontations occur among players leading up to crucial plays such as free kicks or penalties.\n\nThe final frames focus closely on individual reactions possibly post-play actions like contemplation or communication amongst teammates reflecting the emotional rollercoaster inherent in competitive sports. Overall, this compilation offers viewers an insightful glimpse into both celebratory highs and tense lows experienced within professional football environments across multiple significant fixtures.", + "tokens_per_second": 10.528072417044749, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 280, + "inference_time": 45.3060781955719, + "cpu_usage": 34.1, + "cpu_core_utilization": [ + 66.5, + 26.9, + 29.2, + 13.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_020.mp4": { + "response": "The video captures a sequence from a football match, specifically focusing on the goal-scoring moments. Initially, players are seen preparing for an attack near the opponent's penalty area with visible anticipation and strategic positioning. As the action unfolds, one of the players is observed taking a decisive shot at the goal, which leads to a dynamic moment where the ball approaches the net, indicating a potential score. The goalkeeper rushes out in response but fails to reach the ball before it crosses the line, suggesting that a goal has been scored. This pivotal event elicits reactions from both teams; while the scoring team celebrates enthusiastically, their opponents display disappointment or contemplation over the missed opportunity.\n\nFollowing this intense play, there appears to be another attempt by the attacking side to capitalize on any remaining openings within the defensive setup of the opposing team. Despite vigorous efforts and coordination among teammates, the defending squad successfully prevents further advancement into dangerous areas, maintaining control of the game situation. Throughout these sequences, the crowd\u2019s presence adds to the atmosphere, reflecting the high stakes involved in such critical moments of the match.", + "tokens_per_second": 10.013143127227265, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 230, + "inference_time": 40.78685998916626, + "cpu_usage": 50.2, + "cpu_core_utilization": [ + 55.2, + 35.5, + 70.4, + 39.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_021.mp4": { + "response": "The video captures a series of intense moments from two distinct soccer matches. Initially, it showcases gameplay involving players in white and purple jerseys competing against opponents dressed in dark colors with red accents on the field. The focus is primarily on the action near the goal area where multiple attempts are made to score or defend goals. A player wearing number 3 on his jersey prominently features throughout these sequences as he interacts directly with the ball and other players within close proximity to the goalpost.\n\nAs the match progresses, attention shifts towards more detailed interactions between specific individuals such as a goalkeeper donning an orange uniform who is actively involved in defending their team's net. This segment highlights critical defensive plays by this individual amidst ongoing offensive efforts by opposing teams trying to breach the defense line.\n\nTransitioning into another phase of play, we observe different scenes featuring various players engaged in strategic movements across the pitch while being pursued by defenders clad in pink attire. These segments emphasize continuous motion dynamics typical of competitive sports settings like football (soccer) wherein coordination among teammates alongside tactical positioning becomes paramount for success.\n\nThroughout both parts depicted here \u2013 one set during La Liga versus Sevilla dated February 23rd, 2019 and another seemingly representing Club World Cup game held on December 19th, 2008 against Estudiantes \u2013 there exists an overarching narrative revolving around athletic prowess coupled with situational awareness crucial for achieving victory through scoring objectives or thwarting adversaries\u2019 advances.", + "tokens_per_second": 10.80753428151764, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 318, + "inference_time": 47.44177961349487, + "cpu_usage": 33.8, + "cpu_core_utilization": [ + 21.7, + 32.7, + 49.9, + 30.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_022.mp4": { + "response": "The video captures a series of events during a football match. It begins with a player in a pink jersey, presumably the goalkeeper given his attire and position on the field, reacting to an event occurring off-screen or within the game itself. The text overlay indicates that this moment is from the \"Copa World Cup 19/12/2008 vs Estudiantes,\" suggesting it's part of a historical tournament footage.\n\nAs the sequence progresses, there are shots focusing more closely on the same individual who appears to be engaged in some form of interaction, possibly celebrating or expressing emotion due to what might have transpired during the match. Another person wearing similar colors enters the frame, hinting at team dynamics or camaraderie among players.\n\nThe mood shifts towards celebration as multiple individuals dressed in matching pink jerseys come together, hugging each other and reveling in their shared experience. This suggests they may have achieved something significant like scoring a goal or winning the match. There's also mention of media personnel present, indicated by someone holding a camera, capturing these moments for broadcast purposes.\n\nThe scene then transitions back onto the playing field where gameplay continues. Players can be seen strategizing and moving about, indicating active participation in the ongoing match. The aerial perspective provides a comprehensive view of the unfolding action on the pitch.\n\nFinally, the focus narrows down to a critical point near the goalpost where a shot seems imminent. A close-up shows the ball approaching the net, followed by a brief glimpse of the goalkeeper attempting to make a save. Despite efforts, the outcome hints at a possible score being made.\n\nThroughout the various frames, overlays provide context such as dates and opponents involved, enriching viewers' understanding of the specific matches depicted. Overall, the video encapsulates elements typical of sports coverage\u2014player reactions, strategic play, celebrations, and pivotal moments leading up to potential goals.", + "tokens_per_second": 11.271056271043205, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 406, + "inference_time": 54.01884579658508, + "cpu_usage": 32.1, + "cpu_core_utilization": [ + 14.2, + 64.7, + 38.0, + 11.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_023.mp4": { + "response": "The video captures a sequence of events from two distinct soccer matches. Initially, it shows an intense moment during the La Liga match between Real Madrid and Barcelona on April 23, 2017. A player in white is seen advancing with the ball near the sideline, pursued by opponents dressed in blue and red. As he approaches the penalty area, his teammates position themselves strategically to support him. The tension builds as the goalkeeper, clad in green, prepares for action behind the goal line, anticipating a potential shot or pass into the box.\n\nTransitioning to another scene, this time set against the backdrop of a Champions League encounter between Olympique Lyon and PSG on March 13, 2019, we witness a different phase of play. Players wearing orange are actively engaged near the opposing team's goalpost. One particular player demonstrates skillful dribbling past defenders, navigating through tight spaces while maintaining control over the ball. His agility allows him to evade challenges posed by opponents in dark-colored jerseys.\n\nThroughout these moments, the camera angles shift subtly but effectively capture the dynamic nature of both matches\u2014highlighting offensive maneuvers, defensive strategies, and crucial interactions at key areas such as the midfield and penalty boxes. Each frame encapsulates the essence of competitive soccer, emphasizing individual prowess amidst collective team efforts.", + "tokens_per_second": 10.584057777083409, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 287, + "inference_time": 45.303508281707764, + "cpu_usage": 33.8, + "cpu_core_utilization": [ + 27.8, + 11.4, + 55.2, + 40.8 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/mini/mini_60s.json b/API_Transformers/outputs/mini/mini_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..fdf3d1d0edb2c29f8d46955e3c0d444d78cc08d8 --- /dev/null +++ b/API_Transformers/outputs/mini/mini_60s.json @@ -0,0 +1,282 @@ +{ + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_001.mp4": { + "response": "The video showcases a series of moments from various soccer matches, primarily focusing on the celebration and actions surrounding Lionel Messi. Initially, it presents several shots of individuals in colorful jerseys expressing joy and excitement, indicative of goals or significant plays during games. These sequences are interspersed with black screens displaying \"PRESENTED BY WOUVA,\" suggesting this content is part of a curated highlight reel by Wouva media outlet.\n\nAs the video progresses, there's an emphasis on Messi himself through different scenes where he interacts with fans, celebrates with teammates, and executes skillful maneuvers on the field. The background consistently features blurred spectators, highlighting the intensity and focus on his performances rather than the environment around him.\n\nThe narrative then shifts to specific match highlights, such as Messi scoring against Malaga and Valencia, capturing key moments like dribbling past defenders, making precise passes, and ultimately netting crucial goals that contribute significantly to his goal tally. Each scene underscores not just the athletic prowess but also the emotional highs experienced within these pivotal game instances.\n\nOverall, the compilation serves to celebrate Messi's contributions to football, emphasizing both individual brilliance and collective team success captured over multiple seasons and competitions.", + "tokens_per_second": 10.459373498424311, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 268, + "inference_time": 41.88946795463562, + "cpu_usage": 17.0, + "cpu_core_utilization": [ + 15.1, + 18.5, + 27.0, + 7.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_002.mp4": { + "response": "The video captures a series of soccer matches, highlighting key moments and interactions among players. It begins with an intense scene where a player in blue and red stripes attempts to score against defenders wearing white jerseys near the goalpost, while the goalkeeper in orange tries to block the shot. The action quickly transitions to another match featuring similar teams, showcasing a celebration between two players from the team in blue and red stripes after what appears to be a successful play or goal.\n\nAs the narrative unfolds, we see gameplay footage from various angles, focusing on offensive plays by the team in blue and red stripes as they maneuver around opponents dressed in yellow jerseys. A sequence shows a strategic build-up towards the opponent's goal, culminating in a critical moment where a player in blue and red takes a decisive shot at the goal, resulting in a close save by the opposing goalkeeper.\n\nThe excitement continues with more scenes from different games, emphasizing crucial actions such as passing, dribbling, and scoring efforts. There are also glimpses of individual celebrations and reactions from both players and spectators, reflecting the emotional highs and lows inherent in competitive sports. Throughout the video, text overlays provide context about the specific league (La Liga) and dates when these events occurred, adding depth to the visual storytelling presented through this compilation of memorable soccer moments.", + "tokens_per_second": 10.521444099900755, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 279, + "inference_time": 45.17521619796753, + "cpu_usage": 34.8, + "cpu_core_utilization": [ + 27.2, + 30.1, + 57.7, + 24.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_003.mp4": { + "response": "The video showcases a series of football matches highlighting key moments and goals. Initially, it features an intense moment where a player in yellow attempts to score against a goalkeeper amidst the net's backdrop, with text indicating a match from LaLiga on 26/02/2012 versus Atletico Madrid. The focus then shifts to another scene marked by \"96. clubWorld Cup 18/12/2011 vs Santos,\" depicting players in action near the goalpost, culminating in a successful shot past the goalkeeper, followed by scenes capturing celebrations among team members. Another sequence highlights a similar setting for \"95. LaLiga 2012/2014 vs Cordoba,\" focusing again on critical plays leading up to a scored goal, concluding with jubilant reactions from the scoring team. Lastly, the video transitions to \"94. Champions League 24/11/2015 vs AS Roma,\" showcasing dynamic gameplay and ultimately resulting in a goal that leads to celebration amongst players wearing blue and red jerseys.", + "tokens_per_second": 10.026139095405306, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 232, + "inference_time": 41.03426647186279, + "cpu_usage": 34.5, + "cpu_core_utilization": [ + 49.0, + 20.1, + 32.4, + 36.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_004.mp4": { + "response": "The video captures a series of moments from football matches, highlighting key actions and events. It begins with a player in white dribbling the ball while being pursued by opponents dressed in red and blue. As the action unfolds, players are seen running towards each other, indicating an ongoing match scenario where both teams vie for control over the ball. The focus then shifts to a critical moment near the goal area as multiple players converge, suggesting an attempt at scoring or defending against a shot on goal.\n\nSubsequently, there is intense competition around the net with attempts made to either score or block goals, showcasing the dynamic nature of football gameplay. A notable sequence involves a player in a maroon jersey making significant moves toward the goal, culminating in what appears to be a successful play leading to a celebration among team members. This indicates that the attacking team has managed to capitalize on their efforts.\n\nThe narrative transitions to another part of the game featuring different colored jerseys, implying a change in possession or phase within the same match or possibly between two separate games. Players engage actively in passing and positioning themselves strategically across the field, demonstrating continuous movement and tactical plays typical of competitive football settings.\n\nThroughout these sequences, the background consistently shows a stadium setting filled with spectators, underscoring the live sports event atmosphere. Text overlays provide context about specific matches, such as \"94. Champions League 24/11/2015 Vs AS Roma\" and \"93. LaLiga 23/04/2017 vs Real Madrid,\" which helps identify the locations and dates of these events, adding historical significance to the captured moments.", + "tokens_per_second": 11.01241993759931, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 350, + "inference_time": 49.76952075958252, + "cpu_usage": 32.8, + "cpu_core_utilization": [ + 26.8, + 13.1, + 79.1, + 12.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_005.mp4": { + "response": "The video captures various moments from a football match, highlighting key actions and celebrations. Initially, players in blue jerseys are seen advancing the ball towards the goal area while being pursued by opponents in red-and-white striped jerseys. The scene shifts to show an intense moment where multiple players converge around the ball near the penalty box, indicating a possible scoring opportunity or defensive play.\n\nSubsequently, we witness a player in a yellow jersey attempting to clear the ball away from danger, followed by scenes of celebration among team members wearing blue and red stripes, suggesting that they have successfully scored a goal. This is further emphasized as one particular player runs with arms outstretched, expressing joy and triumph on the field.\n\nThe narrative then transitions to another part of the game featuring different teams playing against each other. Players in light blue jerseys face off against those in dark blue, engaging actively in passing and maneuvering the ball across the pitch. A significant event occurs when a shot at the goal is made, leading to visible reactions both from the attacking side celebrating and the defending side possibly preparing for subsequent plays.\n\nThroughout these sequences, there's consistent movement involving dribbling, tackling, and strategic positioning, typical of competitive soccer matches. Celebratory gestures such as hugs and pats on the back signify camaraderie and shared success amongst teammates after goals are scored.", + "tokens_per_second": 10.591706857753069, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 288, + "inference_time": 45.01399254798889, + "cpu_usage": 34.2, + "cpu_core_utilization": [ + 40.7, + 15.3, + 26.2, + 54.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_006.mp4": { + "response": "The video features a series of soccer match highlights, showcasing various moments from La Liga games. It begins with an intense moment where the ball is heading towards the goalpost amidst the netting, suggesting a possible scoring opportunity or near-goal situation. The scene transitions to a player in a blue and red jersey who appears to be celebrating, likely after contributing to a significant play such as a goal.\n\nSubsequently, the focus shifts to gameplay action between two teams on the field. Players are seen strategizing and moving across the pitch, indicating ongoing competition for control of the game's pace and direction. This segment captures the dynamic nature of football, highlighting players running, passing, and positioning themselves effectively within the confines of the playing area.\n\nAs the sequence progresses, there is another glimpse at the crowd, which adds to the atmosphere by showing spectators watching intently, reflecting their engagement with the unfolding events on the field. Following this, we see a close-up shot of a goalkeeper making a save attempt against a challenging shot, emphasizing defensive efforts during crucial moments of the match.\n\nThe narrative continues with more footage of active gameplay involving multiple interactions among players attempting to advance the ball past opponents while evading defenders. There's also a particular emphasis on one player skillfully maneuvering the ball forward, potentially setting up a strategic offensive move.\n\nThroughout these scenes, text overlays provide context regarding specific matches and dates, identifying them as part of La Liga competitions held over different years. These annotations help viewers understand when and where each highlight takes place within the broader scope of professional soccer leagues.", + "tokens_per_second": 10.896315328259965, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 333, + "inference_time": 49.55803036689758, + "cpu_usage": 34.5, + "cpu_core_utilization": [ + 17.4, + 61.3, + 13.1, + 46.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_007.mp4": { + "response": "The video showcases a series of soccer matches, highlighting key moments and celebrations from each game. It begins with a scene where players in blue and red jerseys are actively engaged on the field during what appears to be an intense match against yellow-jerseyed opponents. The focus then shifts to specific actions such as kicking the ball into the goalpost by a player wearing orange attire, followed by scenes capturing jubilant reactions among team members who have likely scored or contributed significantly to their success. As the sequence progresses, there is evidence of multiple goals being scored across different games, indicated by various shots that include close-ups of the action near the net and wider views showing teams strategizing and executing plays. Celebratory gestures like hugs and high-fives suggest triumphs for both individual players and entire squads. The recurring theme throughout these clips emphasizes not only the athletic prowess displayed but also the emotional highs experienced within competitive sports environments.", + "tokens_per_second": 9.590173753322563, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 198, + "inference_time": 38.84540939331055, + "cpu_usage": 35.6, + "cpu_core_utilization": [ + 35.1, + 15.9, + 76.1, + 14.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_008.mp4": { + "response": "The video showcases a series of soccer matches, highlighting key moments and players. It begins with an intense scene from the Champions League match between Barcelona and Juventus on December 9, 2017, where a player in blue is seen preparing to take a free kick against opponents in black and white jerseys. The action transitions smoothly as another sequence shows a different setting during this same game, focusing on dynamic play near the goal area involving multiple players vying for control of the ball.\n\nFollowing that, there's a shift to La Liga featuring Huesca versus an opposing team, captured through various angles including wide shots of the field and close-ups emphasizing critical plays like passes and defensive efforts by both teams. The atmosphere appears charged with excitement from the crowd visible throughout these scenes.\n\nAs the narrative progresses, more highlights are presented within the La Liga context but now involve FC Barcelona facing Real Valladolid. This part emphasizes strategic gameplay with focus shifts onto specific actions such as dribbling past defenders or scoring attempts leading up to goals. Notably, one frame captures a moment just before a potential score, showcasing anticipation among players and spectators alike.\n\nThroughout all segments, text overlays provide contextual information about each match's league status, date, opponent names, and occasionally scores if applicable, grounding viewers\u2019 understanding amidst the unfolding athletic drama.", + "tokens_per_second": 10.608430309528273, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 290, + "inference_time": 45.492167711257935, + "cpu_usage": 34.3, + "cpu_core_utilization": [ + 49.6, + 30.9, + 40.9, + 15.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_009.mp4": { + "response": "The video captures a series of football matches, focusing on key moments where goals are scored. Initially, we see players in action during La Liga match between Real Valladolid and an unspecified opponent on October 29, 2019. The sequence starts with a player attempting to score against the goalkeeper dressed in red, who dives to make a save but fails as the ball goes past him into the net. Following this goal attempt, there's a brief focus on individual players wearing blue jerseys, one with number 10 prominently featured, suggesting his significance possibly due to scoring or assisting in the play leading up to the goal.\n\nThe scene transitions to highlight celebrations among team members after what appears to be another significant moment in the game\u2014likely following a successful goal by the same player seen earlier. Celebrations involve raising arms and showing expressions of joy, indicative of triumph. This is followed by scenes from another match, identified as being part of the Copa del Rey tournament held on January 16, 2013, versus Malaga. Here, the gameplay involves intense efforts near the penalty area, culminating in multiple attempts at goal which include shots that narrowly miss the target, underscoring the competitive nature of these fixtures.\n\nAs the clips progress, we observe more detailed interactions within the pitch: close-ups show players maneuvering for control over the ball while others anticipate its trajectory towards their opponents\u2019 goal. One particular instance emphasizes a critical pass executed skillfully amidst pressure from opposing defenders, setting up a potential scoring opportunity. Throughout these sequences, the consistent presence of text overlays provides context regarding the specific games and dates involved, enhancing viewer understanding without diverting attention away from the unfolding events on the field.", + "tokens_per_second": 11.082226675537616, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 364, + "inference_time": 50.99328017234802, + "cpu_usage": 33.1, + "cpu_core_utilization": [ + 33.1, + 16.3, + 62.2, + 20.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_010.mp4": { + "response": "The video captures a series of intense moments from various football matches. Initially, it shows an action-packed scene where a player in orange is seen kicking the ball towards the goal, with spectators visible in the background. The focus then shifts to players celebrating on the field, indicating a successful play or possibly winning the match. As the sequence progresses, more scenes depict groups of jubilant players hugging and cheering together, suggesting they are part of the same team that has likely achieved something significant during these events.\n\nTransitioning into later parts of the video, there's a change in context as we see preparations for kickoff between two teams standing at opposite ends of the pitch. This leads into gameplay where one side takes possession of the ball and advances toward the opposing goal, creating anticipation among viewers about what will happen next. The tension builds up further when the attacking team makes contact with the ball near the opponent's goal area but fails to score initially. However, this challenge does not dampen their spirits; instead, they continue pressing forward until eventually scoring a goal themselves.\n\nThroughout all these clips, the atmosphere remains charged with excitement and energy typical of live sports events. Each frame conveys different aspects \u2013 from individual actions like kicks and passes to collective celebrations after goals being scored. Overall, this compilation offers a dynamic glimpse into key highlights within competitive football settings.", + "tokens_per_second": 10.551310576211458, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 283, + "inference_time": 44.846466302871704, + "cpu_usage": 33.3, + "cpu_core_utilization": [ + 21.1, + 35.7, + 30.0, + 46.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_011.mp4": { + "response": "The video captures a series of soccer matches, highlighting key moments from different games. It begins with La Liga match between Sevilla and an opposing team on 23/02/2019, showcasing intense gameplay near the goal area where players in white are seen attempting to score while being defended by those in dark jerseys. The sequence progresses through various angles emphasizing player movements, defensive strategies, and scoring attempts within this particular game setting.\n\nTransitioning away from La Liga, the focus shifts briefly to another segment labeled \"77. Club World Cup 19/12/2008 vs Estudiantes,\" indicating a historical context likely related to significant events or memorable goals during that tournament. This part displays players wearing pink uniforms engaging actively on the field against opponents dressed in white, suggesting a competitive environment typical of international club competitions.\n\nAs the narrative continues, there's emphasis on individual actions such as passing the ball and positioning for potential shots at goal, capturing the essence of dynamic play. A notable moment involves a goalkeeper making a save, underscoring critical defensive efforts amidst offensive pressure. Additionally, scenes depict celebratory interactions among teammates possibly after achieving a successful play or securing a point, reflecting the emotional highs experienced during sporting events.\n\nThroughout these segments, text overlays provide contextual information about the specific matches, dates, and teams involved, enhancing viewer comprehension without detracting from the visual storytelling. Overall, the compilation offers a glimpse into the strategic complexities and exhilarating moments inherent to professional soccer across diverse tournaments and leagues.", + "tokens_per_second": 10.880078766885976, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 328, + "inference_time": 48.24014067649841, + "cpu_usage": 33.2, + "cpu_core_utilization": [ + 44.6, + 26.5, + 48.9, + 12.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_012.mp4": { + "response": "The video showcases a series of soccer matches, highlighting key moments where players are actively involved in play. Initially, we see a match between La Liga teams on 23/04/2017 against Real Madrid. A player from the opposing team is seen running towards the goal with the ball, pursued by multiple defenders. The goalkeeper prepares to make a save as the action unfolds near the penalty area.\n\nSubsequently, another sequence features a match during the Champions League on 13/03/2019 versus Olympique Lyon. This time, the focus is on an attacking move within the penalty box, where a player attempts to score while being closely guarded by opponents. Despite efforts to block the shot or intercept the ball, it remains unclear whether the attempt was successful due to the framing of the footage.\n\nThe narrative then shifts back to the same Champions League encounter involving Olympique Lyon. Here, intense competition for possession occurs just outside the penalty area. One player manages to maintain control despite challenges and maneuvers past several opponents before making a decisive pass toward a teammate positioned closer to the goal. However, this particular moment doesn't result in a clear scoring opportunity visible within the provided frames.\n\nThe final segment revisits the earlier mentioned La Liga game dated 24/02/2008 vs Levante. It captures a critical phase where a player runs down the right wing, dribbling through defensive pressure. His determined advance suggests he might be setting up for a cross or preparing to shoot at the goal. Although his exact intentions aren't fully evident from this single frame, the context implies that he could capitalize on any available openings presented by his teammates.\n\nThroughout these sequences, various elements such as team uniforms, stadium settings, and audience presence contribute to understanding the competitive atmosphere of each match. Additionally, text overlays provide contextual information about the league, date, and opponent, enriching our comprehension of these captured highlights without relying solely on visual cues.", + "tokens_per_second": 11.316073008449894, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 419, + "inference_time": 55.462780237197876, + "cpu_usage": 32.8, + "cpu_core_utilization": [ + 30.7, + 22.7, + 65.4, + 12.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_013.mp4": { + "response": "The video showcases a series of soccer matches, capturing key moments and celebrations from various games. It begins with scenes from La Liga on February 24, 2008, featuring players in yellow and purple jerseys competing against those in white shirts, culminating in the celebration of a goal by players in red and blue striped uniforms. The focus then shifts to World Cup Qualification between Colombia and another team, highlighting intense gameplay near the opponent's goal, leading up to a decisive moment where a player in light blue scores past the goalkeeper, followed by jubilant reactions among teammates.\n\nSubsequent segments continue this theme, depicting more goals scored during these qualifiers, including efforts made by teams wearing different colored kits such as dark green and black, and further emphasizing the excitement through celebratory gestures like high-fives and embraces. The narrative extends into other match highlights involving diverse jersey colors, illustrating the dynamic nature of competitive sports. Throughout, the emphasis remains on scoring plays and their immediate aftermath, showcasing not only athletic prowess but also the emotional highs experienced by athletes upon achieving success on the field.", + "tokens_per_second": 10.139085446565758, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 240, + "inference_time": 41.62225914001465, + "cpu_usage": 34.4, + "cpu_core_utilization": [ + 43.3, + 14.6, + 67.4, + 12.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_014.mp4": { + "response": "The video showcases a series of soccer matches, highlighting key moments and interactions between players. Initially, we see an intense moment near the goalpost where a player in orange attempts to score against a goalkeeper dressed in black. The scene transitions through various shots focusing on different aspects such as close-ups of facial expressions indicating concentration or disappointment among players, including one prominently featuring Messi with his back turned towards the camera wearing a blue jersey marked \"MESSI\". As the action unfolds, there are scenes capturing dynamic movements like dribbling past opponents, showcasing agility and skillful play. Notably, several instances depict players in blue jerseys advancing toward the opponent's goal while being pursued by defenders clad in white uniforms. The climax includes successful goals leading to jubilant celebrations amongst teammates who embrace each other in triumph. Throughout these sequences, the background consistently features elements typical of a soccer stadium environment, complete with spectators adding to the atmosphere. Overall, the footage encapsulates the essence of competitive soccer, emphasizing teamwork, individual prowess, and the exhilaration of scoring pivotal points during crucial match moments.", + "tokens_per_second": 10.099402253183086, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 237, + "inference_time": 41.110923290252686, + "cpu_usage": 33.9, + "cpu_core_utilization": [ + 40.9, + 18.6, + 62.6, + 13.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_015.mp4": { + "response": "The video captures a sequence of events from a soccer match, highlighting key moments involving players in blue and red jerseys. Initially, we see an intense moment where a player in a blue jersey is preparing to kick the ball while being closely guarded by opponents in white and pink jerseys. The scene quickly transitions to show another play with multiple players engaged near the goal area, suggesting a build-up towards scoring or defending goals. As the action unfolds, there's evident movement and strategic positioning among the teams as they vie for control of the game.\n\nSubsequently, the focus shifts to individual celebrations, capturing close-ups of jubilant expressions on the faces of players who have presumably scored a goal. These celebratory scenes are interspersed with broader views of gameplay that emphasize teamwork and competitive spirit. Players can be seen maneuvering skillfully across the field, passing the ball strategically amongst themselves, showcasing their coordination and tactics during the match.\n\nThroughout these sequences, various angles provide insights into different aspects of the game\u2014ranging from solo efforts leading up to potential shots at the goal to collective plays designed to outmaneuver opposing defenses. Notably, specific frames highlight interactions between players such as passes, tackles, and possibly set pieces like free kicks or corner kicks which often lead to crucial opportunities for scoring.\n\nThe backdrop consistently features elements typical of a professional sports environment: spectators in stands observing intently, officials overseeing the conduct of the match, and branding visible around the pitch indicating sponsorships and league affiliations. This context enriches our understanding of the setting, underscoring the formal nature of this sporting event likely part of a larger tournament or league standings within the sport.", + "tokens_per_second": 11.047986460564802, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 357, + "inference_time": 50.62872052192688, + "cpu_usage": 33.6, + "cpu_core_utilization": [ + 34.3, + 14.5, + 72.1, + 13.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_016.mp4": { + "response": "The video showcases various soccer matches, capturing key moments and the enthusiastic reactions of spectators. It begins with fans in a stadium clad in blue and white attire cheering on their teams during La Liga match on January 1, 2010, against Tenerife. The focus then shifts to an intense play where players wearing pink jerseys compete for possession near the goal post. As the action unfolds, a player in black scores past the goalkeeper, leading to jubilant celebrations from both the team and supporters. Another sequence features a different game at Copa del Rey between Espanyol and another opponent on January 5, 2016, highlighting successful goals by opposing teams amidst animated crowd reactions. A subsequent scene captures a critical moment where a ball crosses the goal line, suggesting a score has been made. Further along, there's a depiction of gameplay involving multiple players vying for control, culminating in a shot towards the net that is narrowly saved by the goalkeeper. Finally, the concluding segments highlight more dynamic plays, including a save attempt by the goalkeeper and scenes filled with anticipation as players navigate through defensive positions, all set within the context of high-stakes soccer competitions.", + "tokens_per_second": 10.276083296878193, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 254, + "inference_time": 42.978641986846924, + "cpu_usage": 34.5, + "cpu_core_utilization": [ + 48.9, + 17.9, + 49.7, + 21.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_017.mp4": { + "response": "The video showcases a series of soccer matches, highlighting key moments where goals are scored. It begins with an intense moment near the goalpost involving players in white and blue uniforms during a match against Copenhagen on October 20, 2010. The scene quickly transitions to another match where a player wearing yellow successfully scores a goal past the goalkeeper dressed in green. This is followed by scenes from La Liga featuring Barcelona's gameplay, including strategic plays leading up to shots at the goal, culminating in a celebration among teammates after scoring. The narrative then shifts back to Sevilla, capturing similar dynamics as players in red and blue striped jerseys compete against those in white, with notable efforts towards scoring that end in triumph for the latter team upon successful goals. Throughout these sequences, the atmosphere within the stadium is palpable, filled with spectators whose reactions underscore the excitement surrounding each play. The repeated focus on different teams and their interactions around the goal area emphasizes the pivotal role of goal-scoring actions in determining the outcomes of these competitive matches.", + "tokens_per_second": 9.871157401870102, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 218, + "inference_time": 40.32038760185242, + "cpu_usage": 34.5, + "cpu_core_utilization": [ + 28.7, + 13.0, + 82.0, + 14.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_018.mp4": { + "response": "The video showcases a series of soccer matches, capturing key moments and celebrations from various games. Initially, the focus is on players in blue and red jerseys huddling together possibly celebrating or strategizing during an intense match against Uruguay. The scene then transitions to highlight individual player actions within a game involving Real Sociedad, where a central figure in purple engages dynamically with opponents dressed in white-and-blue striped uniforms. This sequence emphasizes dribbling skills and strategic plays leading up to goal attempts by the team in purple.\n\nSubsequently, attention shifts back to the celebration phase post-goal, showing close-ups of jubilant expressions among teammates wearing similar purple kits as seen earlier. The narrative progresses to another competitive setting between La Liga teams Almeria and Barcelona, marked by possession battles near the opponent's goal area. Players are shown executing passes, making runs towards the ball, and attempting shots at the goal amidst defensive maneuvers by their counterparts in pink and white attire.\n\nThroughout these segments, there's a consistent emphasis on gameplay dynamics\u2014dribbling, passing, shooting efforts, and goalkeeper reactions\u2014alongside emotional highs such as goals scored and subsequent celebrations. Each frame captures not only the physical aspects of the sport but also the psychological intensity and camaraderie inherent in professional sports environments.", + "tokens_per_second": 10.527802025402586, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 280, + "inference_time": 44.40972685813904, + "cpu_usage": 33.7, + "cpu_core_utilization": [ + 38.7, + 49.8, + 27.9, + 18.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_019.mp4": { + "response": "The video captures a sequence from a football match featuring two teams, one in dark blue and the other in white. It begins with players spread across the field engaging in typical pre-match warm-up activities before transitioning into active gameplay. The focus shifts to a specific moment where a player wearing number 11 on the dark blue team is seen dribbling towards the goal area, closely pursued by defenders in white jerseys. As the action unfolds, this player attempts to maneuver past opponents but faces defensive pressure near the penalty box.\n\nA critical event occurs as the ball is crossed into the danger zone, leading to an intense struggle for possession between multiple players vying for control of the ball. Amidst this contest, a goalkeeper dressed in yellow makes a crucial save, showcasing agility and skill as he blocks the shot aimed at the net. Following this pivotal play, there's visible celebration among the players in dark blue, suggesting that their efforts may have resulted in either scoring or preventing a goal. This scene likely represents a significant point in the game contributing to the overall dynamics observed throughout the footage.", + "tokens_per_second": 9.991467008425984, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 228, + "inference_time": 41.05040717124939, + "cpu_usage": 34.8, + "cpu_core_utilization": [ + 27.7, + 55.4, + 40.1, + 16.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_020.mp4": { + "response": "The video showcases a sequence of soccer matches, highlighting key moments from different games. Initially, it focuses on a match between La Liga teams Alaves and FC Barcelona on December 21, 2019. The footage begins with the goal area where an orange-clad goalkeeper is seen in action against blue and red striped players who are attempting to score. As the game progresses, we see various scenes including player movements around the field, attempts at scoring, and defensive plays by both teams. Notably, there's a scene depicting intense gameplay near the penalty box where multiple players vie for control of the ball.\n\nFollowing this segment, the focus shifts to another match involving Panathinaikos during the Champions League dated September 14, 2010. This part of the video captures dynamic play as players maneuver and pass the ball strategically across the pitch. There are clear indications of offensive efforts towards the opponent's goal, culminating in successful shots or goals being scored amidst reactions from teammates and opponents alike.\n\nThe final portion of the video revisits the earlier mentioned encounter between Barca and Eibar on February 22, 2020. Similar to the previous clips, it features ongoing actions such as dribbling past defenders, strategic passes among team members, and ultimately leading up to potential scores. Throughout these sequences, we can observe the excitement and intensity typical of competitive soccer matches, along with the fluidity of movement characteristic of professional athletes.", + "tokens_per_second": 10.789004911780786, + "peak_gpu_memory_mb": 11824.375, + "num_generated_tokens": 314, + "inference_time": 47.12105321884155, + "cpu_usage": 33.5, + "cpu_core_utilization": [ + 26.3, + 11.4, + 57.7, + 38.4 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/qwen/qwen_30s.json b/API_Transformers/outputs/qwen/qwen_30s.json new file mode 100644 index 0000000000000000000000000000000000000000..633885a03b269558d133d50495a114c708b91c4d --- /dev/null +++ b/API_Transformers/outputs/qwen/qwen_30s.json @@ -0,0 +1,1318 @@ +{ + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_001.mp4": { + "response": "The video captures a soccer match between two teams, with the audience cheering and the players celebrating. The players are wearing red and blue uniforms, and the audience is cheering them on. The video also shows a player with a soccer ball, and the audience is cheering him on. The video ends with the players celebrating and the audience cheering them on.", + "tokens_per_second": 14.871808817485372, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 69, + "inference_time": 11.539864540100098, + "cpu_usage": 13.0, + "cpu_core_utilization": [ + 11.0, + 11.8, + 12.0, + 17.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_002.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing orange and white jerseys. The match is played on a green field, and the players are seen running around the field, passing the ball to each other. The video also shows a close-up of a player's face, and the ball is seen flying through the air. The video appears to be a highlight reel of the match, showcasing the players' skills and the intensity of the game.", + "tokens_per_second": 15.23367933717406, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 92, + "inference_time": 13.470122814178467, + "cpu_usage": 56.0, + "cpu_core_utilization": [ + 72.9, + 43.3, + 62.7, + 44.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_003.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team scoring a goal. The players on the yellow team are seen celebrating their goal, while the players on the blue team are seen running around the field. The video also shows the yellow team scoring another goal, and the players on the blue team are seen running around the field. The video ends with the yellow team celebrating their victory.", + "tokens_per_second": 15.081603395523958, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 80, + "inference_time": 12.415506362915039, + "cpu_usage": 56.0, + "cpu_core_utilization": [ + 44.3, + 42.4, + 43.2, + 94.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_004.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player scoring a goal. The player is seen running towards the goal, while the goalkeeper is seen standing in front of the goal. The video also shows the audience watching the game, with some people standing in the stands and others sitting on the sidelines. The video ends with the audience cheering for the player who scored the goal.", + "tokens_per_second": 15.126231678377016, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 80, + "inference_time": 12.427834510803223, + "cpu_usage": 55.9, + "cpu_core_utilization": [ + 43.1, + 41.6, + 44.7, + 93.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_005.mp4": { + "response": "The video captures a soccer match between two teams, with the goal being scored by one of the players. The players celebrate their goal, and the video shows them hugging each other. The video also shows the players running around the field, and the camera captures the players from different angles. The video ends with the players hugging each other.", + "tokens_per_second": 15.212259839740025, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 69, + "inference_time": 11.862203598022461, + "cpu_usage": 58.6, + "cpu_core_utilization": [ + 66.9, + 44.0, + 72.7, + 50.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_006.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running towards the goal, and a goal is scored. The video also shows other moments from the match, including a player celebrating and a player running towards the goal. The video is shot in slow motion, highlighting the player's movements and the goal being scored.", + "tokens_per_second": 15.212647055565592, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 74, + "inference_time": 12.112043619155884, + "cpu_usage": 58.5, + "cpu_core_utilization": [ + 60.4, + 48.9, + 79.1, + 45.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_007.mp4": { + "response": "The video captures a soccer match between two teams, with the white team scoring a goal. The goal is made by a player wearing a red jersey, who is seen running towards the goal and scoring. The video also shows other moments from the same match, including a player wearing a blue jersey scoring a goal and a player wearing a green jersey making a save. The video is shot in slow motion, highlighting the action and the goal.", + "tokens_per_second": 15.061764161404504, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 87, + "inference_time": 13.311593770980835, + "cpu_usage": 56.3, + "cpu_core_utilization": [ + 45.1, + 86.2, + 43.5, + 50.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_008.mp4": { + "response": "The video captures a soccer match between two teams, with the goal being scored by one of the players. The video shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding a soccer ball. The video also shows the players celebrating the goal, with one player holding a flag and another player holding", + "tokens_per_second": 15.195056179539952, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.07776951789856, + "cpu_usage": 42.2, + "cpu_core_utilization": [ + 28.4, + 34.8, + 22.9, + 82.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_009.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing red and blue uniforms. The game is intense, with players running around the field and trying to score goals. The video shows several shots of the players scoring goals, with the ball flying into the net and the players celebrating. The video also shows the players running around the field, trying to score goals and keep the ball away from their opponents. The video ends with a shot of the players celebrating, with the ball flying into the net and the players hugging each other.", + "tokens_per_second": 15.094158601086663, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 109, + "inference_time": 13.617509603500366, + "cpu_usage": 50.8, + "cpu_core_utilization": [ + 35.3, + 62.5, + 39.4, + 66.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_010.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player's goal. The player kicks the ball into the net, and the team celebrates their goal. The video also shows other moments from the game, including a player scoring a goal and a player scoring a goal again. The video is shot in a stadium, and the players are seen running around the field. The video is shot in 4k resolution and has a high-quality sound.", + "tokens_per_second": 15.216574058636295, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 93, + "inference_time": 13.189803123474121, + "cpu_usage": 54.9, + "cpu_core_utilization": [ + 52.7, + 40.1, + 82.7, + 44.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_011.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on a player scoring a goal. The player is seen running towards the goal, while the opposing team tries to block the shot. The video also shows the player celebrating his goal, with teammates congratulating him. The video is shot in slow motion, highlighting the player's skill and the impact of the goal. The video is shot in a stadium, with the audience visible in the background. The video is a great example of a soccer match, with the player's skill and the impact of the goal being the main focus.", + "tokens_per_second": 15.021552487173862, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 119, + "inference_time": 15.394820213317871, + "cpu_usage": 52.7, + "cpu_core_utilization": [ + 36.5, + 55.7, + 42.3, + 76.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_012.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen kicking the ball into the goal, and the video shows the reaction of the opposing team. The video also shows the player celebrating with teammates, and the video ends with the player being shown again.", + "tokens_per_second": 15.13753955334378, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 61, + "inference_time": 11.836745977401733, + "cpu_usage": 60.9, + "cpu_core_utilization": [ + 55.8, + 92.6, + 47.5, + 47.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_013.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team scoring a goal. The players on the yellow team celebrate their goal, while the players on the green team react to the goal. The video also shows a replay of the goal, with the yellow team celebrating again. The video is shot in slow motion, highlighting the goal and the celebration.", + "tokens_per_second": 15.243910636194112, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 72, + "inference_time": 11.885440111160278, + "cpu_usage": 58.9, + "cpu_core_utilization": [ + 52.1, + 48.5, + 87.9, + 47.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_014.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen dribbling the ball and making a goal, while the opposing team tries to block the shot. The video also shows other moments from the game, including a player scoring a goal and a player celebrating with teammates. The video is shot in slow motion, highlighting the player's skill and the intensity of the game.", + "tokens_per_second": 15.177171379939264, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 84, + "inference_time": 12.157513618469238, + "cpu_usage": 54.4, + "cpu_core_utilization": [ + 50.8, + 70.9, + 43.4, + 52.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_015.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team scoring a goal in the 85th minute. The blue team player is seen celebrating the goal, while the opposing team is seen in disarray. The video also shows the blue team scoring a goal in the 84th minute, with the opposing team in a similar state of confusion. The video is shot from a high angle, providing a bird's eye view of the game. The video is shot in 4k resolution and has a high frame rate, making it a thrilling watch for soccer fans.", + "tokens_per_second": 15.15960774327166, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 118, + "inference_time": 15.13723087310791, + "cpu_usage": 52.8, + "cpu_core_utilization": [ + 43.8, + 40.6, + 87.5, + 39.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_016.mp4": { + "response": "The video captures a soccer match between two teams, with the ball being kicked into the goal by a player in a red jersey. The video also shows a replay of the same moment, highlighting the player's skill and the impact of the kick. The video also shows other moments from the game, including a player in a yellow jersey scoring a goal, and a player in a blue jersey scoring a goal. The video also shows a replay of a player in a red jersey scoring a goal, and a replay of a player in a yellow jersey scoring a goal. The video also shows a replay of a player in a blue jersey scoring a goal, and a replay of a player in a red jersey scoring a goal. The video also shows a replay of a player in a yellow jersey scoring a goal, and a replay of a player in a blue jersey scoring a goal. The video also shows a replay of a player in a red jersey scoring a goal, and a replay of a player in a yellow jersey scoring a goal. The video also shows a replay of a player in a blue jersey scoring a goal, and a replay of a player in a red jersey scoring a goal. The video also shows a replay of a player in a yellow jersey scoring a goal, and a replay", + "tokens_per_second": 15.159210675949232, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.046422481536865, + "cpu_usage": 41.5, + "cpu_core_utilization": [ + 23.6, + 19.5, + 24.5, + 98.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_017.mp4": { + "response": "The video captures a soccer match between two teams, with the players celebrating and the ball moving around the field. The players are wearing blue and white jerseys, and the game is being played on a green field. The video also shows a replay of a previous game, with the same players and the same ball. The video is shot in 4k resolution and has a high-quality sound.", + "tokens_per_second": 15.273026527404369, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 78, + "inference_time": 12.704134225845337, + "cpu_usage": 58.6, + "cpu_core_utilization": [ + 59.4, + 46.5, + 80.3, + 48.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_018.mp4": { + "response": "The video captures a soccer match between two teams, with the ball moving around the field. The players are seen running around the field, trying to score a goal. The video also shows a replay of a previous goal, with the ball moving around the field again. The players are seen running around the field, trying to score a goal. The video also shows a replay of a previous goal, with the ball moving around the field again. The players are seen running around the field, trying to score a goal. The video also shows a replay of a previous goal, with the ball moving around the field again. The players are seen running around the field, trying to score a goal. The video also shows a replay of a previous goal, with the ball moving around the field again. The players are seen running around the field, trying to score a goal. The video also shows a replay of a previous goal, with the ball moving around the field again. The players are seen running around the field, trying to score a goal. The video also shows a replay of a previous goal, with the ball moving around the field again. The players are seen running around the field, trying to score a goal. The video also shows a replay of a previous goal, with the", + "tokens_per_second": 15.166782184633584, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.879508018493652, + "cpu_usage": 42.8, + "cpu_core_utilization": [ + 50.4, + 48.1, + 27.7, + 45.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_019.mp4": { + "response": "The video captures a soccer match between two teams, with the players celebrating and the crowd cheering. The players are seen running around the field, and the camera focuses on their movements and expressions. The video also shows a replay of a goal scored by one of the players, and the players are seen celebrating. The video is shot in a stadium, and the players are wearing blue and red uniforms. The video is shot in 4k resolution and has a resolution of 1920x1080.", + "tokens_per_second": 15.058410769565253, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 104, + "inference_time": 14.89810061454773, + "cpu_usage": 53.7, + "cpu_core_utilization": [ + 44.7, + 64.2, + 38.5, + 67.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_020.mp4": { + "response": "The video captures a soccer match between two teams, with the ball being kicked into the goal by a player from the home team. The video also shows other moments from the game, including a player scoring a goal and a player being hit by a ball. The video is shot from a high angle, providing a bird's eye view of the game. The video also features a logo for \"Wouva\" and a website for \"bet365\" at the bottom.", + "tokens_per_second": 15.1965029864204, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 96, + "inference_time": 13.389514923095703, + "cpu_usage": 53.4, + "cpu_core_utilization": [ + 70.7, + 41.8, + 61.9, + 39.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_021.mp4": { + "response": "The video captures a soccer match between two teams, with the ball being kicked into the goal by a player from the white team. The video also shows a replay of a previous goal from the same match, highlighting the player's skill and the goal's accuracy. The video also features a replay of a previous match, showcasing the player's ability to score a goal in a different match. The video also features a replay of a previous match, highlighting the player's ability to score a goal in a different match.", + "tokens_per_second": 15.19014004168475, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 102, + "inference_time": 13.887450218200684, + "cpu_usage": 53.9, + "cpu_core_utilization": [ + 89.7, + 39.6, + 43.8, + 42.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_022.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on the players' movements and interactions. The players are seen running around the field, with one player wearing a pink shirt. The video also shows a goal being scored, and the players are seen celebrating. The video is shot from an aerial perspective, providing a bird's eye view of the game. The video is likely a highlight reel of a soccer match, showcasing the players' skills and the excitement of the game.", + "tokens_per_second": 15.279043428402563, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 98, + "inference_time": 13.577979326248169, + "cpu_usage": 53.4, + "cpu_core_utilization": [ + 43.0, + 38.4, + 88.7, + 43.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_023.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular moment. The players are seen running around the field, with one player wearing a green jersey attempting to score a goal. The video also shows a replay of the same moment, highlighting the player's effort and the goal's outcome. The video is shot in a high-definition format, providing a clear view of the action.", + "tokens_per_second": 15.05226145720559, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 80, + "inference_time": 12.87895154953003, + "cpu_usage": 56.9, + "cpu_core_utilization": [ + 51.3, + 84.1, + 43.8, + 48.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_024.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on a player from the yellow team who scores a goal. The player is seen running towards the goal, while the opposing team tries to block the shot. The video also shows a replay of the goal, highlighting the player's skill and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of the shot. The video also features a replay of a previous goal, showing the player's previous performance and the impact of", + "tokens_per_second": 15.240152338245363, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.84521198272705, + "cpu_usage": 43.2, + "cpu_core_utilization": [ + 26.8, + 33.4, + 27.3, + 85.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_025.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing blue and yellow jerseys. The players are seen running around the field, with one player in a blue jersey jumping and celebrating with his teammates. The video also shows a goal being scored by one of the players, and the players are seen running around the field in celebration. The video appears to be a highlight reel of a soccer match, showcasing the players' skills and teamwork.", + "tokens_per_second": 15.13505504510679, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 89, + "inference_time": 13.333450317382812, + "cpu_usage": 56.3, + "cpu_core_utilization": [ + 41.0, + 47.6, + 47.8, + 88.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_026.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team scoring a goal. The players are seen running around the field, with the yellow team celebrating their goal. The video also shows a replay of the goal, highlighting the player's skill and the impact of the goal on the game. The video is shot in a high-definition format, providing a clear and detailed view of the action.", + "tokens_per_second": 15.050975248385665, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 80, + "inference_time": 12.46246886253357, + "cpu_usage": 57.0, + "cpu_core_utilization": [ + 50.2, + 44.2, + 78.1, + 55.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_027.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on a player from one team scoring a goal. The player is seen running towards the goal, while the opposing team tries to block the shot. The video also shows other moments from the game, including a player falling to the ground and a player running towards the goal. The video is shot in slow motion, emphasizing the action and movement of the players.", + "tokens_per_second": 15.113742071308003, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 85, + "inference_time": 12.314559698104858, + "cpu_usage": 54.2, + "cpu_core_utilization": [ + 39.5, + 69.8, + 67.7, + 39.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_028.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on the players' movements and interactions. The players are seen running around the field, with one player wearing a blue jersey and the other wearing a white jersey. The video also shows a close-up of the ball, with the players' feet and legs visible. The players are seen running towards the goal, with one player wearing a yellow jersey and the other wearing a blue jersey. The video also shows a close-up of the ball, with the players' feet and legs visible. The players are seen running towards the goal, with one player wearing a yellow jersey and the other wearing a blue jersey. The video also shows a close-up of the ball, with the players' feet and legs visible. The players are seen running towards the goal, with one player wearing a yellow jersey and the other wearing a blue jersey. The video also shows a close-up of the ball, with the players' feet and legs visible. The players are seen running towards the goal, with one player wearing a yellow jersey and the other wearing a blue jersey. The video also shows a close-up of the ball, with the players' feet and legs visible. The players are seen running towards the goal, with one player wearing a yellow", + "tokens_per_second": 15.184081740207581, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.241493940353394, + "cpu_usage": 44.3, + "cpu_core_utilization": [ + 53.7, + 38.7, + 44.8, + 40.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_029.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running towards the goal, and the camera follows him as he scores a goal. The video also shows other moments from the game, including a player running towards the goal and a player running with the ball. The video is shot in slow motion, highlighting the player's movements and the goal's impact. The video is shot in a stadium, with the audience visible in the background. The video is shot in 4k resolution, providing a high-quality viewing experience.", + "tokens_per_second": 15.08619472557226, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 115, + "inference_time": 14.875043153762817, + "cpu_usage": 53.2, + "cpu_core_utilization": [ + 60.4, + 54.7, + 52.5, + 45.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_030.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team wearing white jerseys and the red team wearing pink jerseys. The blue team scores a goal, and the players celebrate with hugs and high fives. The video also shows a replay of the goal, highlighting the players' reactions and the celebration. The video is shot in a stadium with a crowd watching the game.", + "tokens_per_second": 15.171960313899701, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 76, + "inference_time": 12.87618350982666, + "cpu_usage": 59.9, + "cpu_core_utilization": [ + 81.1, + 56.0, + 54.9, + 47.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_031.mp4": { + "response": "The video captures a soccer match between two teams, with the audience in the stands watching the game. The players are seen running around the field, and the ball is seen flying through the air. The video also shows a close-up of the goal, with the audience in the stands watching the game.", + "tokens_per_second": 15.02481834834144, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 60, + "inference_time": 11.396022081375122, + "cpu_usage": 60.1, + "cpu_core_utilization": [ + 52.9, + 95.2, + 46.1, + 46.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_032.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running towards the goal, and a goal is scored. The video also shows other moments from the game, including a player falling down and a player scoring a goal. The video is shot in slow motion, highlighting the action and the player's performance.", + "tokens_per_second": 15.037380994065911, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 72, + "inference_time": 12.380358934402466, + "cpu_usage": 59.4, + "cpu_core_utilization": [ + 45.0, + 68.2, + 51.4, + 73.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_033.mp4": { + "response": "The video captures a soccer match between two teams, with the first goal being scored by a player from one team. The video shows the players celebrating the goal, and the camera captures the moment from various angles. The video also shows the players hugging each other after the goal is scored.", + "tokens_per_second": 15.003906375038524, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 58, + "inference_time": 10.863168478012085, + "cpu_usage": 57.5, + "cpu_core_utilization": [ + 79.3, + 63.3, + 43.3, + 44.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_034.mp4": { + "response": "The video captures a soccer match between two teams, with the audience in the stands cheering and the players celebrating. The players are seen running around the field, with one player jumping and celebrating. The video also shows a close-up of a soccer ball and a player running towards the goal. The video ends with the players hugging each other.", + "tokens_per_second": 15.165868358135322, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 68, + "inference_time": 11.808286190032959, + "cpu_usage": 57.7, + "cpu_core_utilization": [ + 50.1, + 46.5, + 87.8, + 46.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_035.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing red and blue uniforms. The players are seen running around the field, passing the ball to each other, and trying to score goals. The video also shows a close-up of a player's hand, which is seen holding a ball. The video appears to be a highlight reel of the match, showcasing the players' movements and the ball's trajectory. The video is likely a compilation of moments from the soccer match, highlighting the players' skills and the excitement of the game.", + "tokens_per_second": 15.110408717530474, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 109, + "inference_time": 13.945532083511353, + "cpu_usage": 52.1, + "cpu_core_utilization": [ + 72.4, + 41.1, + 57.4, + 37.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_036.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing red and blue uniforms. The players are seen running around the field, with one player wearing a red jersey and blue shorts. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards the goal. The video also shows a player wearing a red jersey and blue shorts, who is seen running towards", + "tokens_per_second": 15.223225350422531, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.9981210231781, + "cpu_usage": 43.6, + "cpu_core_utilization": [ + 48.5, + 27.3, + 71.5, + 27.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_037.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular moment. The players are seen running around the field, with one player scoring a goal by kicking the ball into the net. The players celebrate the goal, and the video ends with a shot of the players hugging each other.", + "tokens_per_second": 15.003346181961264, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 62, + "inference_time": 11.55632758140564, + "cpu_usage": 59.2, + "cpu_core_utilization": [ + 61.4, + 70.8, + 49.2, + 55.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_038.mp4": { + "response": "The video captures a soccer match between two teams, with the players celebrating after a goal. The players are seen running around the field, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in the air and another player hugging the player who scored. The video also shows the players celebrating after a goal, with one player jumping in", + "tokens_per_second": 15.172943466192525, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.131529092788696, + "cpu_usage": 43.8, + "cpu_core_utilization": [ + 60.6, + 32.6, + 31.0, + 51.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_039.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team wearing blue shirts and the yellow team wearing yellow shirts. The blue team scores a goal, and the yellow team celebrates. The video also shows a replay of the goal, with the blue team hugging each other in celebration. The video is shot from an aerial perspective, providing a bird's eye view of the game. The video is a great example of the excitement and energy that can be generated by a game of soccer.", + "tokens_per_second": 15.153695587333297, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 99, + "inference_time": 14.337132692337036, + "cpu_usage": 55.1, + "cpu_core_utilization": [ + 71.6, + 43.4, + 46.2, + 59.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_040.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on a player wearing a yellow jersey. The player is seen running towards the goal, while the opposing team tries to block his path. The video also shows other players from both teams, with one player in a green jersey seen running towards the goal. The video appears to be a highlight reel of a soccer match, showcasing the action and excitement of the game.", + "tokens_per_second": 15.232834056432706, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 86, + "inference_time": 12.454859495162964, + "cpu_usage": 55.1, + "cpu_core_utilization": [ + 65.2, + 50.6, + 60.2, + 44.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_041.mp4": { + "response": "The video captures a soccer match between two teams, with the audience in the stands cheering. The players are seen running around the field, and the camera focuses on a particular player who is seen running towards the goal. The video also shows a replay of the same scene, with the player running towards the goal again. The video appears to be a highlight reel of the match, showcasing the players' movements and the audience's reaction.", + "tokens_per_second": 14.954580894213183, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 86, + "inference_time": 12.258346796035767, + "cpu_usage": 52.3, + "cpu_core_utilization": [ + 35.3, + 43.6, + 44.0, + 86.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_042.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running towards the goal, and the camera follows him as he scores a goal. The video also shows other moments from the game, including a player celebrating with teammates and a player running with the ball. The video is shot in a close-up view, highlighting the player's movements and the goal he scores.", + "tokens_per_second": 15.159302047347232, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 83, + "inference_time": 13.257526397705078, + "cpu_usage": 57.6, + "cpu_core_utilization": [ + 95.1, + 43.6, + 43.1, + 48.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_043.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running down the field, making a powerful shot on goal, which is blocked by the goalkeeper. The video also shows other moments from the game, including a player scoring a goal and a player running with the ball. The video is shot in slow motion, highlighting the player's skill and the intensity of the game.", + "tokens_per_second": 15.018958222653303, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 85, + "inference_time": 12.98527455329895, + "cpu_usage": 56.9, + "cpu_core_utilization": [ + 46.8, + 92.5, + 43.2, + 45.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_044.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team scoring a goal. The players are seen running around the field, with the blue team players wearing blue jerseys and the opposing team players wearing white jerseys. The video also shows the audience in the stands, with some people standing up and others sitting down. The video is shot in slow motion, and the camera focuses on the goal and the players around it. The video is shot in a stadium, with the audience in the stands watching the game.", + "tokens_per_second": 15.125037916461388, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 103, + "inference_time": 13.790148973464966, + "cpu_usage": 51.8, + "cpu_core_utilization": [ + 38.0, + 78.6, + 38.2, + 52.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_045.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing red and white uniforms. The players are seen running around the field, passing the ball to each other, and trying to score goals. The video also shows a close-up of a player's face, and the ball is seen flying through the air. The video appears to be a highlight reel of a soccer game, showcasing the players' skills and techniques.", + "tokens_per_second": 15.253951389421339, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 85, + "inference_time": 12.066267251968384, + "cpu_usage": 55.0, + "cpu_core_utilization": [ + 59.4, + 41.3, + 76.1, + 43.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_046.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing different colored jerseys. The players are seen running around the field, with one player wearing a yellow jersey scoring a goal. The video also shows the players celebrating their goal, with one player jumping in the air and another player holding the ball. The video ends with the players walking away from the field.", + "tokens_per_second": 15.184556314585924, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 74, + "inference_time": 12.241403579711914, + "cpu_usage": 58.4, + "cpu_core_utilization": [ + 59.3, + 45.4, + 80.2, + 48.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_047.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running with the ball and making a pass, while the opposing team tries to block it. The video also shows a replay of the incident, highlighting the player's skill and the intensity of the game. The video is shot from a high angle, providing a bird's eye view of the action on the field. The video is shot in 4k resolution, providing a high-quality viewing experience. The video is shot in 1080p resolution, providing a clear and detailed view of the action. The video is shot in 1080p resolution, providing a clear and detailed view of the action. The video is shot in 1080p resolution, providing a clear and detailed view of the action. The video is shot in 1080p resolution, providing a clear and detailed view of the action. The video is shot in 1080p resolution, providing a clear and detailed view of the action. The video is shot in 1080p resolution, providing a clear and detailed view of the action. The video is shot in 1080p resolution, providing a clear and", + "tokens_per_second": 15.207603805981988, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.65810465812683, + "cpu_usage": 43.6, + "cpu_core_utilization": [ + 36.5, + 29.1, + 84.5, + 24.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_048.mp4": { + "response": "The video captures a soccer match between two teams, with the green team scoring a goal. The players are seen running around the field, and the green team is shown celebrating their goal. The video also shows other moments from the game, including a player falling down and a player running towards the goal. The video is shot in slow motion, highlighting the action and movement of the players.", + "tokens_per_second": 15.161178501396146, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 77, + "inference_time": 12.008180141448975, + "cpu_usage": 56.5, + "cpu_core_utilization": [ + 45.3, + 88.5, + 48.9, + 43.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_049.mp4": { + "response": "The video captures a soccer match between Argentina and Brazil. The players are seen running around the field, with one player scoring a goal and celebrating with teammates. The video also shows a shot of a soccer ball flying through the air, and a player wearing a blue and white striped jersey is seen running on the field. The video ends with a shot of the players hugging each other.", + "tokens_per_second": 15.219667630599881, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 77, + "inference_time": 11.618788003921509, + "cpu_usage": 55.2, + "cpu_core_utilization": [ + 59.3, + 41.7, + 75.7, + 44.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_050.mp4": { + "response": "The video captures a soccer match between Argentina and Brazil, with the Argentine team wearing yellow shirts and the Brazilian team wearing blue shirts. The game is intense, with players running around the field and trying to score. The video shows several shots of the game, including a goal scored by a player from Argentina. The video also shows the players celebrating after scoring, with the players from Argentina jumping and the players from Brazil jumping and cheering. The video ends with a shot of the players from Argentina celebrating after scoring.", + "tokens_per_second": 15.145118635028181, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 101, + "inference_time": 13.584778547286987, + "cpu_usage": 53.7, + "cpu_core_utilization": [ + 48.5, + 40.6, + 83.8, + 41.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_051.mp4": { + "response": "The video captures a soccer match between two teams, with the players running around the field and the ball being kicked into the goal. The players are wearing different colored uniforms, and the video shows several shots of the game. The video also features a replay of a goal scored by one of the players, and the players are seen celebrating the goal. The video is shot in a stadium, and the players are seen running around the field, with the ball being kicked into the goal. The video is shot in a stadium, and the players are seen running around the field, with the ball being kicked into the goal. The video is shot in a stadium, and the players are seen running around the field, with the ball being kicked into the goal. The video is shot in a stadium, and the players are seen running around the field, with the ball being kicked into the goal. The video is shot in a stadium, and the players are seen running around the field, with the ball being kicked into the goal. The video is shot in a stadium, and the players are seen running around the field, with the ball being kicked into the goal. The video is shot in a stadium, and the players are seen running around the field, with the ball being kicked into the", + "tokens_per_second": 15.082250533041343, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.172358512878418, + "cpu_usage": 43.9, + "cpu_core_utilization": [ + 38.6, + 29.1, + 29.3, + 78.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_052.mp4": { + "response": "The video captures a soccer match between two teams, with players from both teams running around the field. The yellow team is wearing yellow shirts, while the blue team is wearing blue shirts. The match is taking place on a green field, and the players are actively engaged in the game. The video also shows a close-up of a soccer ball, which is being dribbled by a player from the yellow team. The video also shows a shot of a soccer player from the blue team, who is running towards the ball. The video also shows a shot of a soccer player from the yellow team, who is running towards the goal. The video also shows a shot of a soccer player from the blue team, who is running towards the goal. The video also shows a shot of a soccer player from the yellow team, who is running towards the goal. The video also shows a shot of a soccer player from the blue team, who is running towards the goal. The video also shows a shot of a soccer player from the yellow team, who is running towards the goal. The video also shows a shot of a soccer player from the blue team, who is running towards the goal. The video also shows a shot of a soccer player from the yellow team, who is running towards the", + "tokens_per_second": 15.177486403943025, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.567049503326416, + "cpu_usage": 42.5, + "cpu_core_utilization": [ + 42.4, + 25.4, + 52.4, + 49.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_053.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular moment where a player from one team scores a goal. The video shows the players celebrating the goal, with one player jumping in the air and another player holding his head in excitement. The video also shows the players from the opposing team reacting to the goal, with one player jumping in the air and another player holding his head in excitement. The video also shows the players from the same team celebrating the goal, with one player jumping in the air and another player holding his head in excitement. The video also shows the players from the opposing team celebrating the goal, with one player jumping in the air and another player holding his head in excitement. The video also shows the players from the same team celebrating the goal, with one player jumping in the air and another player holding his head in excitement. The video also shows the players from the opposing team celebrating the goal, with one player jumping in the air and another player holding his head in excitement. The video also shows the players from the same team celebrating the goal, with one player jumping in the air and another player holding his head in excitement. The video also shows the players from the opposing team celebrating the goal, with one player jumping in the air and another", + "tokens_per_second": 15.081825570037651, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.054357051849365, + "cpu_usage": 43.4, + "cpu_core_utilization": [ + 71.9, + 31.3, + 37.4, + 33.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_054.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team wearing blue shirts and the yellow team wearing yellow shirts. The blue team scores a goal, and the players celebrate with each other. The video also shows a close-up of the soccer ball, highlighting its movement and the players' reactions to it. The video is shot in a fast-paced manner, with the players running around the field and the ball moving quickly. The video ends with the players hugging each other, indicating a successful goal for the blue team.", + "tokens_per_second": 15.079603719843547, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 106, + "inference_time": 13.948255777359009, + "cpu_usage": 53.3, + "cpu_core_utilization": [ + 41.2, + 42.7, + 90.3, + 39.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_055.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team wearing green shirts and the blue team wearing white shirts. The yellow team scores a goal, and the players celebrate. The video also shows a replay of the goal, highlighting the player's skill and the impact of the goal on the game. The video is shot in a stadium with a large crowd watching the game.", + "tokens_per_second": 14.890565837651767, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 77, + "inference_time": 12.888222455978394, + "cpu_usage": 58.3, + "cpu_core_utilization": [ + 94.6, + 46.2, + 44.9, + 47.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_056.mp4": { + "response": "The video captures a soccer match between Argentina and Real Madrid. The players are seen running around the field, with one player wearing a blue jersey and white shorts. The video also shows a player from Argentina scoring a goal, and the players from Argentina and Real Madrid celebrating. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players from Argentina and Real Madrid celebrating again. The video also shows a replay of the goal, with the players", + "tokens_per_second": 15.160021425103128, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.86567187309265, + "cpu_usage": 43.2, + "cpu_core_utilization": [ + 44.4, + 33.6, + 36.8, + 57.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_057.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing white and blue uniforms. The game is played on a green field, and the players are seen running around the field, passing the ball to each other. The video also shows a close-up of a soccer ball being kicked by a player, and a player wearing a white shirt is seen running towards the goal. The video also shows a shot of a soccer player wearing a red shirt, and a player wearing a blue shirt is seen running towards the goal. The video also shows a shot of a soccer player wearing a white shirt, and a player wearing a red shirt is seen running towards the goal. The video also shows a shot of a soccer player wearing a blue shirt, and a player wearing a white shirt is seen running towards the goal. The video also shows a shot of a soccer player wearing a red shirt, and a player wearing a white shirt is seen running towards the goal. The video also shows a shot of a soccer player wearing a blue shirt, and a player wearing a white shirt is seen running towards the goal. The video also shows a shot of a soccer player wearing a red shirt, and a player wearing a white shirt is seen running towards the goal. The video also shows a shot", + "tokens_per_second": 15.034732126920906, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.185829162597656, + "cpu_usage": 43.5, + "cpu_core_utilization": [ + 28.4, + 28.0, + 29.3, + 88.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_058.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player from the blue team who is seen kicking the ball into the goal. The player is wearing a white jersey with the number 10 on it. The match is taking place on a green field, and the audience can be seen watching the game from the stands. The video also shows a replay of the same moment, highlighting the player's skillful kick. The video is a great example of a soccer match, with the player's skillful kick being the main focus.", + "tokens_per_second": 15.074772750512258, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 111, + "inference_time": 14.79603123664856, + "cpu_usage": 54.2, + "cpu_core_utilization": [ + 38.8, + 59.8, + 44.2, + 74.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_059.mp4": { + "response": "The video captures a soccer match between Argentina and Nigeria. It features a player from Argentina scoring a goal, which is followed by a replay of the same moment. The video also shows other moments from the match, including a player from Nigeria scoring a goal and a replay of that moment. The video ends with a replay of the goal from Argentina.", + "tokens_per_second": 15.142981091351468, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 69, + "inference_time": 12.087930679321289, + "cpu_usage": 60.1, + "cpu_core_utilization": [ + 65.5, + 47.9, + 76.2, + 50.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_060.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team wearing yellow shirts and the blue team wearing blue shirts. The yellow team scores a goal, and the players celebrate by jumping and clapping. The video also shows a replay of the goal, with the players and the referee standing around the goal. The video ends with the yellow team celebrating their goal.", + "tokens_per_second": 15.20735275491903, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 74, + "inference_time": 12.052000999450684, + "cpu_usage": 57.3, + "cpu_core_utilization": [ + 69.3, + 62.2, + 49.5, + 48.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_061.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on a player scoring a goal. The player is seen running towards the goal, while the opposing team tries to block the shot. The video also shows a replay of the goal, highlighting the player's skill and the impact of the shot. The video also features a replay of a different match, with the same player scoring a goal. The video is shot in a stadium, with the players and the audience visible in the background. The video is shot in 4k resolution and is available in 1080p and 720p.", + "tokens_per_second": 15.116573969074487, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 126, + "inference_time": 15.45172905921936, + "cpu_usage": 50.9, + "cpu_core_utilization": [ + 48.3, + 45.5, + 75.4, + 34.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_062.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player from one team scoring a goal. The player is seen running towards the goal, while the opposing team tries to block the shot. The video also shows other moments from the game, including a player from the opposing team celebrating a goal and a player from the same team celebrating a goal. The video is shot from a high angle, providing a bird's eye view of the game.", + "tokens_per_second": 15.150096909336943, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 92, + "inference_time": 13.583905458450317, + "cpu_usage": 54.8, + "cpu_core_utilization": [ + 46.6, + 47.8, + 62.0, + 62.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_063.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player wearing a blue and red jersey. The player is seen running towards the goal, and a goal is scored. The player celebrates the goal with his teammates, and the video ends with the players still celebrating.", + "tokens_per_second": 15.141533992515093, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 57, + "inference_time": 11.331657648086548, + "cpu_usage": 62.7, + "cpu_core_utilization": [ + 48.2, + 55.8, + 96.4, + 50.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_064.mp4": { + "response": "The video captures a soccer match between two teams, with the ball being kicked into the net by a player from one team. The players from the opposing team celebrate the goal, and the video shows several more goals being scored by the same team. The video also shows a player from the opposing team running towards the goal, and the ball is kicked into the net by a player from the same team. The video ends with a shot of the players from the opposing team celebrating the goal.", + "tokens_per_second": 15.096657909178079, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 97, + "inference_time": 13.59653615951538, + "cpu_usage": 53.9, + "cpu_core_utilization": [ + 37.1, + 42.5, + 95.9, + 40.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_065.mp4": { + "response": "The video captures a soccer match between Argentina and Iran. The players are seen running around the field, with one player wearing a red jersey and the other wearing a white jersey. The video also shows a close-up of the goal, with the audience in the stands watching the game. The video ends with a shot of the players celebrating, with one player wearing a white jersey and the other wearing a red jersey.", + "tokens_per_second": 14.940171103532032, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 82, + "inference_time": 12.342964172363281, + "cpu_usage": 56.1, + "cpu_core_utilization": [ + 49.2, + 86.1, + 48.6, + 40.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_066.mp4": { + "response": "The video captures a soccer match between Argentina and Iran. The players are seen running around the field, with one player wearing a yellow jersey scoring a goal. The video also shows a replay of the goal, highlighting the player's skill and the impact of the shot. The video also features a shot of a soccer player wearing a yellow jersey, which is likely the player who scored the goal. The video also shows a replay of the goal, highlighting the player's skill and the impact of the shot.", + "tokens_per_second": 14.904040429165455, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 100, + "inference_time": 13.780113220214844, + "cpu_usage": 55.9, + "cpu_core_utilization": [ + 49.7, + 49.8, + 81.6, + 42.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_067.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on a player wearing a red jersey. The player is seen dribbling the ball and making a pass to a teammate, who then scores a goal. The video also shows other players on the field, including a player in a white jersey who is seen running towards the goal. The video appears to be a highlight reel of a soccer game, showcasing the player's skills and the team's performance.", + "tokens_per_second": 14.830904988362118, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 93, + "inference_time": 12.951255321502686, + "cpu_usage": 52.7, + "cpu_core_utilization": [ + 40.7, + 36.3, + 37.9, + 95.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_068.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing red and white uniforms. The game is played on a green field, and the players are seen running around the field, passing the ball, and trying to score. The video also shows a close-up of a player scoring a goal, and the players celebrating. The video is shot in slow motion, highlighting the action and the players' movements. The video is shot in a way that captures the excitement and energy of the game, with the players running around the field and the ball being passed to each other. The video is shot in a way that highlights the players' movements and the game's excitement, with the slow motion effect emphasizing the action and the players' movements.", + "tokens_per_second": 15.1336533103342, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 149, + "inference_time": 16.86223816871643, + "cpu_usage": 48.5, + "cpu_core_utilization": [ + 38.2, + 52.1, + 34.5, + 69.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_069.mp4": { + "response": "The video captures a soccer match between two teams, with the players running around the field and the ball being kicked. The players are wearing blue and white uniforms, and the ball is being kicked by a player in a blue uniform. The video also shows a close-up of the ball being kicked, and the players are running around the field. The video is shot in slow motion, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium, and the players are seen running around the field. The video is shot in a stadium,", + "tokens_per_second": 15.159161023592182, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.81626558303833, + "cpu_usage": 45.5, + "cpu_core_utilization": [ + 65.6, + 35.5, + 27.1, + 53.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_070.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player wearing a red jersey. The player is seen dribbling the ball and making a pass, while the opposing team tries to block him. The video also shows a replay of the same moment, highlighting the player's skill and the intensity of the game. The video is shot from a high angle, providing a bird's eye view of the action on the field. The video is shot in 4k resolution, providing a high-quality viewing experience. The video is a great example of the skill and athleticism required in soccer, and the replay highlights the player's ability to make a pass and the intensity of the game.", + "tokens_per_second": 15.19258322661976, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 139, + "inference_time": 16.96237540245056, + "cpu_usage": 51.3, + "cpu_core_utilization": [ + 77.7, + 39.2, + 52.5, + 35.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_071.mp4": { + "response": "The video captures a soccer match between two teams, with the players in blue jerseys and yellow shirts. The players are seen running around the field, with one player in blue holding a toy. The video also shows a close-up of a player's face, and a replay of the same scene. The video ends with a call to subscribe to the channel.", + "tokens_per_second": 15.094968885806995, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 71, + "inference_time": 11.335002899169922, + "cpu_usage": 56.8, + "cpu_core_utilization": [ + 90.7, + 41.6, + 48.1, + 47.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_072.mp4": { + "response": "The video captures a soccer match between Argentina and Bosnia-Herzegovina. The game is played on a green field, and the players are seen running around the field, passing the ball, and trying to score. The video also shows a close-up of the ball hitting the goal, indicating a successful goal by the Argentine team. The video is a fast-paced display of soccer action, with players running around the field and trying to score.", + "tokens_per_second": 15.089671871346129, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 89, + "inference_time": 12.458594799041748, + "cpu_usage": 53.7, + "cpu_core_utilization": [ + 41.5, + 38.1, + 39.3, + 95.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_073.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player's skillful play. The player is seen dribbling the ball with precision, showcasing his control and agility. The player's movements are fluid, and he appears to be in control of the ball, indicating his ability to handle it effectively. The player's movements are highlighted by the camera, which captures his movements in detail, allowing the viewer to appreciate the player's skill and technique. The video also features other moments of the match, including a goal scored by the opposing team, but the main focus remains on the player's dribbling skills. Overall, the video showcases the player's ability to control the ball and his ability to make goals, highlighting his skill and technique in the game.", + "tokens_per_second": 15.166777613396597, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 154, + "inference_time": 16.8999662399292, + "cpu_usage": 48.3, + "cpu_core_utilization": [ + 68.4, + 35.7, + 57.5, + 31.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_074.mp4": { + "response": "The video captures a soccer match between Argentina and Mexico. The players are seen running around the field, with one player wearing a blue jersey scoring a goal. The video also shows a replay of the goal, highlighting the player's skill and the impact of the goal on the game. The video also features a replay of a different soccer match, but the focus remains on the goal scored by the Argentine player.", + "tokens_per_second": 15.050789376653178, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 81, + "inference_time": 12.29308295249939, + "cpu_usage": 56.3, + "cpu_core_utilization": [ + 44.5, + 50.7, + 42.0, + 87.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_075.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing blue and white uniforms. The players are seen running around the field, with one player wearing a yellow jersey. The video also shows a close-up of the soccer ball, with the players' movements and the ball's trajectory. The video ends with a shot of the players celebrating, with the camera zooming in on their faces.", + "tokens_per_second": 15.095610978597701, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 80, + "inference_time": 11.829913139343262, + "cpu_usage": 54.9, + "cpu_core_utilization": [ + 47.2, + 56.5, + 45.0, + 70.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_076.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen dribbling the ball and making a powerful shot, which is followed by a celebration from the players. The video also shows other moments from the same match, including a player being hit by a ball and a player being hit by a ball. The video is a highlight reel of the match, showcasing the player's skills and the excitement of the game.", + "tokens_per_second": 15.096457780335871, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 92, + "inference_time": 12.638344526290894, + "cpu_usage": 53.5, + "cpu_core_utilization": [ + 41.6, + 38.1, + 39.4, + 94.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_077.mp4": { + "response": "The video captures a soccer match between two teams, with the white team wearing white jerseys and the black team wearing black jerseys. The white team scores a goal, and the black team tries to defend it. The video also shows a replay of the goal, highlighting the player who scored it. The video also shows the white team scoring another goal, and the black team trying to defend it. The video ends with a replay of the second goal.", + "tokens_per_second": 15.115430955512632, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 89, + "inference_time": 13.076369047164917, + "cpu_usage": 55.8, + "cpu_core_utilization": [ + 45.9, + 46.4, + 91.0, + 39.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_078.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player wearing a red and blue uniform. The player is seen dribbling the ball and making a shot on goal, which is blocked by the goalkeeper. The video also shows the player celebrating after scoring a goal, with the crowd in the background. The video is a highlight of a soccer match, showcasing the player's skill and the intensity of the game.", + "tokens_per_second": 15.155702459448817, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 86, + "inference_time": 12.564993381500244, + "cpu_usage": 55.1, + "cpu_core_utilization": [ + 74.5, + 39.8, + 61.1, + 45.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_079.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player wearing a green and white jersey. The player is seen running towards the goal, and the camera follows his movements as he approaches the goal. The player is then seen celebrating with teammates, and the camera captures the moment. The video showcases the excitement and energy of a soccer match, with players running towards the goal and celebrating their success.", + "tokens_per_second": 15.112858391134807, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 83, + "inference_time": 13.396330118179321, + "cpu_usage": 58.8, + "cpu_core_utilization": [ + 49.9, + 45.3, + 90.4, + 49.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_080.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on a player wearing a blue jersey. The player is seen dribbling the ball and making a pass to a teammate, who then scores a goal. The video also shows other moments from the game, including a player scoring a goal, a player running with the ball, and a player scoring a goal again. The video is shot in slow motion, highlighting the action and movement of the players. The video also features a shot of the audience cheering, adding to the excitement of the game. Overall, the video showcases the skill and athleticism of the players, as well as the excitement of the crowd.", + "tokens_per_second": 15.170065726896636, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 134, + "inference_time": 16.34519863128662, + "cpu_usage": 51.3, + "cpu_core_utilization": [ + 80.9, + 52.4, + 34.9, + 36.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_081.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team wearing blue shirts and the red team wearing yellow shirts. The yellow team scores a goal, and the players celebrate. The video also shows a replay of the goal, with the yellow team players running towards the goal. The video also features a replay of a different soccer match, with the red team wearing blue shirts and the yellow team wearing yellow shirts. The video also features a replay of a different soccer match, with the red team wearing blue shirts and the yellow team wearing yellow shirts. The video also features a replay of a different soccer match, with the red team wearing blue shirts and the yellow team wearing yellow shirts. The video also features a replay of a different soccer match, with the red team wearing blue shirts and the yellow team wearing yellow shirts. The video also features a replay of a different soccer match, with the red team wearing blue shirts and the yellow team wearing yellow shirts. The video also features a replay of a different soccer match, with the red team wearing blue shirts and the yellow team wearing yellow shirts. The video also features a replay of a different soccer match, with the red team wearing blue shirts and the yellow team wearing yellow shirts. The video also features a replay of a different soccer", + "tokens_per_second": 15.067583259819104, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.755812883377075, + "cpu_usage": 43.1, + "cpu_core_utilization": [ + 22.7, + 28.1, + 39.8, + 81.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_082.mp4": { + "response": "The video captures a soccer match between Argentina and Brazil. The players are seen running around the field, with one player kicking the ball towards the goal. The crowd is cheering, and the players are seen celebrating their goal. The video also shows a different soccer match between Argentina and Getafe, with the players running around the field and celebrating their goal.", + "tokens_per_second": 15.231033552960797, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 71, + "inference_time": 11.35098934173584, + "cpu_usage": 56.9, + "cpu_core_utilization": [ + 49.3, + 42.9, + 88.6, + 46.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_083.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team wearing yellow and red jerseys and the red team wearing blue and yellow jerseys. The blue team scores a goal, and the red team celebrates. The video also shows a replay of the goal, with the blue team's goalkeeper seen in a close-up shot. The video also features a replay of a different soccer match, with the red team wearing blue and yellow jerseys and the blue team wearing yellow and red jerseys. The video also features a replay of a different soccer match, with the red team wearing blue and yellow jerseys and the blue team wearing yellow and red jerseys. The video also features a replay of a different soccer match, with the red team wearing blue and yellow jerseys and the blue team wearing yellow and red jerseys. The video also features a replay of a different soccer match, with the red team wearing blue and yellow jerseys and the blue team wearing yellow and red jerseys. The video also features a replay of a different soccer match, with the red team wearing blue and yellow jerseys and the blue team wearing yellow and red jerseys. The video also features a replay of a different soccer match, with the red team wearing blue and yellow jerseys and the blue team wearing yellow and red jerseys. The video also features a", + "tokens_per_second": 15.208403584063404, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.628050565719604, + "cpu_usage": 43.2, + "cpu_core_utilization": [ + 62.4, + 38.7, + 45.7, + 25.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_084.mp4": { + "response": "The video captures a soccer match between Barcelona and Liverpool. It shows a player from Barcelona scoring a goal, while the goalkeeper from Liverpool is seen in the background. The video also features a replay of the goal, with the ball hitting the net and the goalkeeper falling to the ground. The video is shot in a stadium with a large crowd watching the game.", + "tokens_per_second": 15.128428649096634, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 71, + "inference_time": 11.763778924942017, + "cpu_usage": 57.5, + "cpu_core_utilization": [ + 45.1, + 71.3, + 47.2, + 66.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_085.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on the player wearing a blue jersey. The player is seen running towards the goal, while the opposing team tries to block his path. The video also shows a player from the opposing team attempting to score, but the ball is blocked by the goalkeeper. The video ends with the player from the blue team celebrating, while the opposing team looks disappointed.", + "tokens_per_second": 15.09179881903882, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 83, + "inference_time": 12.981280326843262, + "cpu_usage": 57.2, + "cpu_core_utilization": [ + 95.5, + 44.2, + 40.6, + 48.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_086.mp4": { + "response": "The video captures a soccer match between two teams, with the first team wearing blue and white jerseys and the second team wearing yellow and white jerseys. The first team scores a goal, and the video shows the replay of that goal. The video also shows the first team scoring another goal, and the replay of that goal. The video also shows the first team scoring a third goal, and the replay of that goal. The video also shows the first team scoring a fourth goal, and the replay of that goal. The video also shows the first team scoring a fifth goal, and the replay of that goal. The video also shows the first team scoring a sixth goal, and the replay of that goal. The video also shows the first team scoring a seventh goal, and the replay of that goal. The video also shows the first team scoring a eighth goal, and the replay of that goal. The video also shows the first team scoring a ninth goal, and the replay of that goal. The video also shows the first team scoring a tenth goal, and the replay of that goal. The video also shows the first team scoring a eleventh goal, and the replay of that goal. The video also shows the first team scoring a twelfth goal, and the replay of that goal.", + "tokens_per_second": 15.243661062498152, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.256847858428955, + "cpu_usage": 43.6, + "cpu_core_utilization": [ + 32.2, + 26.5, + 86.3, + 29.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_087.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen dribbling the ball and making a goal, while the opposing team tries to defend the goal. The video also shows other moments from the match, including a player scoring a goal and a player falling down. The video is shot in slow motion, highlighting the player's skill and the intensity of the game.", + "tokens_per_second": 15.018603405888955, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 83, + "inference_time": 13.33133602142334, + "cpu_usage": 58.0, + "cpu_core_utilization": [ + 47.7, + 69.1, + 47.8, + 67.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_088.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team wearing blue jerseys and the white team wearing white jerseys. The blue team scores a goal, and the players celebrate by jumping and clapping. The video also shows the audience cheering and the players celebrating their goal. The video is shot in slow motion, highlighting the excitement and energy of the game.", + "tokens_per_second": 15.050961440857408, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 73, + "inference_time": 12.163671731948853, + "cpu_usage": 58.7, + "cpu_core_utilization": [ + 47.3, + 73.4, + 60.5, + 53.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_089.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player wearing a yellow jersey. The player is seen running towards the goal, and the ball is in the air, indicating that the player is about to make a shot. The video also shows the player's movements and the surrounding players, providing a detailed view of the game.", + "tokens_per_second": 15.072998222410463, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 70, + "inference_time": 11.240507125854492, + "cpu_usage": 56.2, + "cpu_core_utilization": [ + 62.1, + 46.8, + 74.6, + 41.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_090.mp4": { + "response": "The video captures a soccer match between two teams, with the audience in the stands cheering and the players actively engaged in the game. The players are seen running around the field, with one player wearing a yellow jersey and another wearing a red jersey. The video also shows a goal being scored, with the players celebrating and the audience cheering. The video is shot in a close-up view of the players, highlighting their movements and the excitement of the match.", + "tokens_per_second": 15.126399424127708, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 90, + "inference_time": 13.312969446182251, + "cpu_usage": 55.6, + "cpu_core_utilization": [ + 40.0, + 39.7, + 95.5, + 47.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_091.mp4": { + "response": "The video captures a thrilling moment in a soccer match between two teams. The players are seen running around the field, with one player in a green jersey making a powerful shot on the goal. The ball is deflected by the goalkeeper, and the players celebrate their goal, with one player jumping in the air and another holding the ball. The video also shows the players from both teams celebrating their goal, with one player jumping in the air and another holding the ball. The video ends with the players from both teams celebrating their goal, with one player jumping in the air and another holding the ball.", + "tokens_per_second": 14.93804804390747, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 119, + "inference_time": 14.680425882339478, + "cpu_usage": 50.5, + "cpu_core_utilization": [ + 44.0, + 60.5, + 34.3, + 63.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_092.mp4": { + "response": "The video captures a soccer match between two teams, with the audience in the stands cheering and the players running around the field. The players are wearing green and blue uniforms, and the game is intense, with players running and kicking the ball. The video ends with a message of appreciation for the viewers.", + "tokens_per_second": 15.067549387193225, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 60, + "inference_time": 9.988833904266357, + "cpu_usage": 54.1, + "cpu_core_utilization": [ + 45.6, + 75.5, + 38.9, + 56.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_093.mp4": { + "response": "The video features a black and white logo with a play button, which is likely a YouTube logo. The logo is displayed on a black background. The video content is not visible in the given information.", + "tokens_per_second": 15.029368119821319, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 40, + "inference_time": 6.936399459838867, + "cpu_usage": 51.0, + "cpu_core_utilization": [ + 34.1, + 73.9, + 41.2, + 54.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_094.mp4": { + "response": "The video features a black background with a large, bold letter \"W\" in the center. The letter \"W\" is repeated twice, creating a rhythmic pattern. The letter \"W\" is in a dark color, which contrasts sharply with the black background. The video appears to be a simple, minimalist piece of art.", + "tokens_per_second": 15.14670944729439, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 66, + "inference_time": 8.353766679763794, + "cpu_usage": 45.1, + "cpu_core_utilization": [ + 46.0, + 56.7, + 46.2, + 31.6 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/qwen/qwen_60s.json b/API_Transformers/outputs/qwen/qwen_60s.json new file mode 100644 index 0000000000000000000000000000000000000000..ec2d83d65b65a4eacdbc3ee466ec060bbb740ae7 --- /dev/null +++ b/API_Transformers/outputs/qwen/qwen_60s.json @@ -0,0 +1,660 @@ +{ + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_001.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on the player wearing a blue and yellow uniform. The player is seen running towards the goal, and the camera follows his movements as he scores a goal. The video also shows other moments from the match, including a player in a yellow shirt running towards the goal and a player in a white shirt running towards the goal. The video also features a shot of a player in a blue and yellow uniform running towards the goal, and a shot of a player in a white shirt running towards the goal. The video also features a shot of a player in a blue and yellow uniform running towards the goal, and a shot of a player in a white shirt running towards the goal. The video also features a shot of a player in a blue and yellow uniform running towards the goal, and a shot of a player in a white shirt running towards the goal. The video also features a shot of a player in a blue and yellow uniform running towards the goal, and a shot of a player in a white shirt running towards the goal. The video also features a shot of a player in a blue and yellow uniform running towards the goal, and a shot of a player in a white shirt running towards the goal. The video also", + "tokens_per_second": 13.40059383147904, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 22.90508794784546, + "cpu_usage": 39.6, + "cpu_core_utilization": [ + 45.6, + 27.3, + 30.7, + 54.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_002.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team scoring multiple goals. The players are seen running around the field, with the yellow team scoring multiple goals. The video also shows a close-up of the soccer ball, with the yellow team scoring multiple goals. The video ends with the yellow team celebrating their victory.", + "tokens_per_second": 10.05763431796692, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 66, + "inference_time": 12.750664234161377, + "cpu_usage": 58.9, + "cpu_core_utilization": [ + 57.3, + 49.5, + 77.4, + 51.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_003.mp4": { + "response": "The video captures a soccer match between two teams, with the goal being scored by one of the players. The players are seen celebrating their goal, and the video also shows other moments from the game, including a player scoring a goal and a player being hit by a ball. The video also shows the players celebrating their goal, and the video ends with the players celebrating their goal.", + "tokens_per_second": 10.362352898648883, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 76, + "inference_time": 12.628535985946655, + "cpu_usage": 55.6, + "cpu_core_utilization": [ + 45.3, + 41.0, + 40.3, + 95.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_004.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on the goal. The players are seen running around the field, with one player wearing a blue jersey jumping and celebrating. The video also shows a replay of the goal, with the same player jumping and celebrating again. The video also features a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again. The video also shows a replay of a different goal, with the same player jumping and celebrating again.", + "tokens_per_second": 13.296233255859406, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.65294337272644, + "cpu_usage": 42.4, + "cpu_core_utilization": [ + 26.8, + 21.9, + 30.7, + 90.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_005.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player's performance. The player is seen dribbling the ball and making a powerful shot, which is followed by a celebration from the players. The video also shows other moments from the match, including a player scoring a goal and a player being congratulated by teammates. The video is shot in slow motion, highlighting the player's skill and the excitement of the moment.", + "tokens_per_second": 10.711200379469911, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 89, + "inference_time": 13.55346417427063, + "cpu_usage": 55.0, + "cpu_core_utilization": [ + 41.9, + 41.9, + 43.0, + 92.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_006.mp4": { + "response": "The video captures a soccer match between two teams, with the main focus on the goal. The players are seen running around the field, with one player scoring a goal and celebrating with teammates. The video also shows other moments from the game, including a player scoring a goal and a player being hit by a ball. The video is shot in slow motion, highlighting the action and movement of the players.", + "tokens_per_second": 10.554908172002714, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 80, + "inference_time": 14.000629186630249, + "cpu_usage": 58.1, + "cpu_core_utilization": [ + 53.2, + 60.0, + 44.6, + 74.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_007.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team scoring a goal. The players are seen running around the field, with the yellow team celebrating their goal. The video also shows a replay of the goal, highlighting the player's skill and the impact of the goal on the game.", + "tokens_per_second": 8.628679137685621, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 60, + "inference_time": 12.558310985565186, + "cpu_usage": 74.4, + "cpu_core_utilization": [ + 67.5, + 68.4, + 67.8, + 94.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_008.mp4": { + "response": "The video captures a soccer match between two teams, with the ball moving around the field. The players are wearing different colored jerseys, and the audience is visible in the background. The video also shows a replay of a goal scored by one of the players.", + "tokens_per_second": 9.15227998451498, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 51, + "inference_time": 11.113506555557251, + "cpu_usage": 60.6, + "cpu_core_utilization": [ + 65.4, + 50.8, + 73.7, + 52.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_009.mp4": { + "response": "The video captures a soccer match between two teams, with the players running around the field and the ball being passed to each other. The players are wearing green and white uniforms, and the game is being played in a stadium with a crowd in the stands. The video also shows a replay of a goal scored by one of the players, and the ball is shown in slow motion.", + "tokens_per_second": 10.469246141743787, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 76, + "inference_time": 12.908668756484985, + "cpu_usage": 56.9, + "cpu_core_utilization": [ + 51.9, + 50.0, + 82.6, + 43.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_010.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing green and white uniforms. The players are seen running around the field, with one player wearing a blue jersey. The video also shows a close-up of a soccer ball, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a blue jersey, and the players are seen running towards the", + "tokens_per_second": 13.449126642449913, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.426010847091675, + "cpu_usage": 42.5, + "cpu_core_utilization": [ + 33.7, + 26.9, + 85.1, + 24.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_011.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing white and red uniforms. The match is played on a green field, and the players are seen running around the field, trying to score a goal. The video also shows a close-up of a player's face, and a shot of a player's head. The video ends with a shot of a player's face, and a shot of a player's head.", + "tokens_per_second": 10.74788200235932, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 87, + "inference_time": 13.56297779083252, + "cpu_usage": 54.7, + "cpu_core_utilization": [ + 41.7, + 38.4, + 43.0, + 95.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_012.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular moment. The players are seen running around the field, with one player wearing a green jersey and a white shirt. The video shows the player scoring a goal, with the ball flying into the net and the player celebrating. The video also shows other moments from the match, including a player wearing a red jersey and a white shirt running towards the goal. The video ends with the players celebrating, with the ball flying into the net and the player wearing a green jersey and a white shirt running towards the goal.", + "tokens_per_second": 11.649960463588755, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 117, + "inference_time": 15.866786241531372, + "cpu_usage": 51.9, + "cpu_core_utilization": [ + 50.2, + 61.0, + 37.1, + 59.2 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_013.mp4": { + "response": "The video captures a soccer match between Argentina and Colombia. The players are seen running around the field, with one player wearing a yellow jersey scoring a goal. The video also shows other scenes from different soccer matches, including a player in a yellow jersey celebrating a goal, and a player in a blue jersey running towards the goal. The video also shows a player in a yellow jersey running towards the goal, and a player in a blue jersey running towards the goal. The video also shows a player in a yellow jersey running towards the goal, and a player in a blue jersey running towards the goal. The video also shows a player in a yellow jersey running towards the goal, and a player in a blue jersey running towards the goal. The video also shows a player in a yellow jersey running towards the goal, and a player in a blue jersey running towards the goal. The video also shows a player in a yellow jersey running towards the goal, and a player in a blue jersey running towards the goal. The video also shows a player in a yellow jersey running towards the goal, and a player in a blue jersey running towards the goal. The video also shows a player in a yellow jersey running towards the goal, and a player in a blue jersey running towards the goal. The video", + "tokens_per_second": 13.434648062904184, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.372610569000244, + "cpu_usage": 43.2, + "cpu_core_utilization": [ + 36.8, + 35.2, + 67.0, + 33.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_014.mp4": { + "response": "The video captures a soccer match between Racing Santander and Racing M\u00e1laga. The match begins with a player from Racing Santander scoring a goal, and the video shows the replay of that goal. The video also shows other moments from the match, including a player from Racing M\u00e1laga scoring a goal, and a replay of that goal. The video also shows the players celebrating the goals, and the replay of the celebration. The video also shows the players running around the field, and the replay of that running. The video also shows the players hugging each other, and the replay of that hugging. The video also shows the players running towards the goal, and the replay of that running. The video also shows the players celebrating the goals, and the replay of that celebration. The video also shows the players running towards the goal, and the replay of that running. The video also shows the players celebrating the goals, and the replay of that celebration. The video also shows the players running towards the goal, and the replay of that running. The video also shows the players celebrating the goals, and the replay of that celebration. The video also shows the players running towards the goal, and the replay of that running. The video also shows the players celebrating the goals,", + "tokens_per_second": 13.479950074027617, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.027027130126953, + "cpu_usage": 42.7, + "cpu_core_utilization": [ + 48.2, + 24.3, + 70.2, + 28.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_015.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running down the field, passing the ball to his teammate, who then scores a goal. The video also shows other moments from the game, including a player hugging another player, and a player running with the ball. The video is shot in slow motion, highlighting the action and movement of the players. The video is shot in a stadium, with the audience visible in the background. The video is shot in 4k resolution, providing a high-quality viewing experience.", + "tokens_per_second": 11.640350568059489, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 116, + "inference_time": 15.755470037460327, + "cpu_usage": 52.7, + "cpu_core_utilization": [ + 42.0, + 34.5, + 36.8, + 97.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_016.mp4": { + "response": "The video captures a soccer match between two teams, with the ball being kicked into the goal by a player. The video shows several shots of the match, including a close-up of the ball entering the goal and a shot of the player celebrating. The video also shows the ball being kicked by the opposing team and the ball being blocked by the goalkeeper. The video is a fast-paced display of soccer action, with the ball being kicked and the players running around the field.", + "tokens_per_second": 11.043868658200097, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 94, + "inference_time": 14.13460636138916, + "cpu_usage": 54.7, + "cpu_core_utilization": [ + 43.8, + 72.0, + 41.7, + 61.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_017.mp4": { + "response": "The video captures a soccer match between two teams, with the ball being kicked into the goal by one of the players. The players are seen celebrating their goal, and the audience is watching the game. The video also shows other soccer games being played, with the ball being kicked into the goal by the players. The video is shot in a way that captures the excitement and energy of the soccer game.", + "tokens_per_second": 10.51133078300418, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 80, + "inference_time": 13.23256540298462, + "cpu_usage": 54.4, + "cpu_core_utilization": [ + 44.0, + 48.0, + 42.7, + 82.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_018.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing red and white uniforms. The players are seen running around the field, passing the ball to each other, and trying to score goals. The video also shows a close-up of a player's face, and the audience is seen cheering on the players. The video is a fast-paced display of soccer gameplay, with the players running around the field and passing the ball to each other.", + "tokens_per_second": 10.98742027777536, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 90, + "inference_time": 13.309138298034668, + "cpu_usage": 53.7, + "cpu_core_utilization": [ + 42.9, + 47.3, + 39.1, + 85.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_019.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on the goal scored by one of the players. The players are seen running around the field, with the ball being passed to a player who scores a goal. The video also shows other moments from the game, including a player scoring a goal and a player being congratulated by teammates. The video is shot from an aerial perspective, providing a bird's eye view of the game.", + "tokens_per_second": 10.921069856446843, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 89, + "inference_time": 13.78211498260498, + "cpu_usage": 54.9, + "cpu_core_utilization": [ + 90.6, + 49.1, + 41.1, + 38.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_020.mp4": { + "response": "The video captures a soccer match between two teams, with the players running around the field and the ball being kicked into the goal. The players are wearing different colored uniforms, and the ball is being kicked by a player wearing a blue shirt. The video also shows a replay of the goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into the goal and the players celebrating. The video also shows a replay of the previous goal, with the ball being kicked into", + "tokens_per_second": 13.491381583278928, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.341111660003662, + "cpu_usage": 43.2, + "cpu_core_utilization": [ + 40.0, + 25.3, + 79.9, + 27.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_021.mp4": { + "response": "The video captures a soccer match between two teams, with the audience in the stands cheering and the players engaged in the game. The video showcases various moments from the match, including a player scoring a goal, a player being tackled, and a player running with the ball. The video also features a player wearing a pink jersey, and the audience is seen cheering and waving. The video is a great representation of the excitement and energy of a soccer match.", + "tokens_per_second": 10.918759797926471, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 90, + "inference_time": 13.616441011428833, + "cpu_usage": 53.1, + "cpu_core_utilization": [ + 84.7, + 43.6, + 46.6, + 37.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_022.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen dribbling the ball and making a goal, while the opposing team tries to block the shot. The video also shows other moments from the game, including a player scoring a goal and a player running with the ball. The video is shot in slow motion, highlighting the player's skill and the intensity of the game.", + "tokens_per_second": 10.866962165814611, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 85, + "inference_time": 12.839205980300903, + "cpu_usage": 54.1, + "cpu_core_utilization": [ + 52.7, + 52.2, + 63.9, + 47.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_023.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player wearing a red and blue jersey. The player is seen dribbling the ball and making a pass, while the opposing team tries to defend the ball. The video also shows other players from the same team, who are seen running around the field and trying to block the pass. The video is shot in slow motion, highlighting the player's skill and the intensity of the game. The video is shot in a close-up view, allowing the viewer to see the player's movements and the ball's trajectory. Overall, the video showcases the skill and athleticism of the soccer player and the intensity of the game.", + "tokens_per_second": 12.056567900835331, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 136, + "inference_time": 16.067134380340576, + "cpu_usage": 48.6, + "cpu_core_utilization": [ + 50.8, + 76.4, + 33.9, + 33.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_024.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on the players' movements and the ball's trajectory. The players are seen running around the field, with one player wearing a green jersey and the other wearing a blue jersey. The video also shows a goal being scored by the green jersey player, and the ball is seen flying through the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows a replay of the goal, with the ball still in the air. The video also shows", + "tokens_per_second": 13.381210503864766, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.630710124969482, + "cpu_usage": 43.7, + "cpu_core_utilization": [ + 27.6, + 45.4, + 28.2, + 73.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_025.mp4": { + "response": "The video captures a soccer match between Argentina and Brazil, with the Argentinian team wearing blue shirts and the Brazilian team wearing yellow shirts. The Argentinian team scores a goal, and the players celebrate with hugs and high fives. The video also shows other soccer matches between Argentina and Brazil, including a match between Argentina and Deportivo La Coruna. The video is a collection of soccer footage from different matches, with the Argentinian team wearing blue shirts and the Brazilian team wearing yellow shirts.", + "tokens_per_second": 11.34225179666489, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 102, + "inference_time": 14.623593807220459, + "cpu_usage": 54.3, + "cpu_core_utilization": [ + 39.2, + 45.2, + 91.6, + 41.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_026.mp4": { + "response": "The video captures a soccer match between two teams, with the yellow team scoring a goal. The video shows the players in action, with the yellow team scoring multiple goals and the green team defending. The video also shows the players running around the field, with the yellow team scoring multiple goals and the green team defending. The video ends with the yellow team scoring a goal.", + "tokens_per_second": 10.428299716256006, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 74, + "inference_time": 12.605700492858887, + "cpu_usage": 57.1, + "cpu_core_utilization": [ + 71.9, + 46.6, + 61.8, + 48.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_027.mp4": { + "response": "The video captures a soccer match between two teams, with the goal being scored by one of the players. The players are seen running around the field, with one player wearing a blue jersey and the other wearing a yellow jersey. The video also shows a replay of the goal, with the players running around the field and celebrating. The video also shows a replay of the goal, with the players running around the field and celebrating.", + "tokens_per_second": 10.689614725622825, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 85, + "inference_time": 13.572460889816284, + "cpu_usage": 56.5, + "cpu_core_utilization": [ + 41.5, + 47.4, + 49.0, + 88.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_028.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team wearing blue shirts and the white team wearing white shirts. The blue team scores a goal, and the white team celebrates. The video also shows other soccer matches, including a Copa America match between two teams, and a Copa America match between two teams. The video also shows a soccer match between two teams, with the white team wearing white shirts and the blue team wearing blue shirts. The video also shows a soccer match between two teams, with the white team wearing white shirts and the blue team wearing blue shirts. The video also shows a soccer match between two teams, with the white team wearing white shirts and the blue team wearing blue shirts. The video also shows a soccer match between two teams, with the white team wearing white shirts and the blue team wearing blue shirts. The video also shows a soccer match between two teams, with the white team wearing white shirts and the blue team wearing blue shirts. The video also shows a soccer match between two teams, with the white team wearing white shirts and the blue team wearing blue shirts. The video also shows a soccer match between two teams, with the white team wearing white shirts and the blue team wearing blue shirts. The video also shows a soccer match between two teams", + "tokens_per_second": 13.483402737231337, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.664146184921265, + "cpu_usage": 43.9, + "cpu_core_utilization": [ + 63.8, + 27.2, + 57.7, + 27.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_029.mp4": { + "response": "The video captures a soccer match between Argentina and Venezuela. The players are seen running around the field, with one player wearing a yellow jersey and black shorts. The video also shows a close-up of a soccer ball, and the players are seen running towards the goal. The video also shows a shot of a soccer player with a tattoo on their arm.", + "tokens_per_second": 10.14359586210809, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 70, + "inference_time": 12.203787803649902, + "cpu_usage": 57.1, + "cpu_core_utilization": [ + 64.5, + 58.1, + 44.6, + 61.3 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_030.mp4": { + "response": "The video captures a soccer match between Argentina and Nigeria. The players are seen running around the field, with one player wearing a yellow jersey scoring a goal. The video also features a shot of a soccer player celebrating, and a shot of a soccer player running with the ball. The video also shows a shot of a soccer player with a yellow jersey, and a shot of a soccer player with a yellow jersey running with the ball.", + "tokens_per_second": 10.746648641485438, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 86, + "inference_time": 14.553328275680542, + "cpu_usage": 58.5, + "cpu_core_utilization": [ + 52.2, + 82.8, + 47.5, + 51.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_031.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen running towards the goal, and the camera pans around to show other players on the field. The video also features a replay of the goal, with the player celebrating and the crowd watching. The video is shot in slow motion, highlighting the player's skill and the excitement of the game.", + "tokens_per_second": 10.552761688015028, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 79, + "inference_time": 13.729512214660645, + "cpu_usage": 57.5, + "cpu_core_utilization": [ + 85.1, + 49.1, + 51.1, + 44.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_032.mp4": { + "response": "The video captures a soccer match between two teams, with the players running around the field and the ball being kicked into the goal. The players are wearing different colored jerseys, and the ball is being kicked by a player wearing a yellow jersey. The video also shows a close-up of the ball being kicked into the goal, and the players are seen celebrating the goal. The video is shot in slow motion, and the players are seen running around the field, with the ball being kicked by a player wearing a yellow jersey. The video is shot in slow motion, and the players are seen running around the field, with the ball being kicked by a player wearing a yellow jersey. The video is shot in slow motion, and the players are seen running around the field, with the ball being kicked by a player wearing a yellow jersey. The video is shot in slow motion, and the players are seen running around the field, with the ball being kicked by a player wearing a yellow jersey. The video is shot in slow motion, and the players are seen running around the field, with the ball being kicked by a player wearing a yellow jersey. The video is shot in slow motion, and the players are seen running around the field, with the ball being kicked by a player wearing a", + "tokens_per_second": 13.374680716381132, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.956707239151, + "cpu_usage": 44.3, + "cpu_core_utilization": [ + 25.3, + 23.5, + 32.3, + 96.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_033.mp4": { + "response": "The video captures a soccer match between Argentina and Iran. The players are seen running around the field, with one player wearing a red jersey and the other wearing a blue jersey. The video also shows a player from Argentina celebrating with his teammates.", + "tokens_per_second": 8.750997375438747, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 48, + "inference_time": 10.455338716506958, + "cpu_usage": 59.6, + "cpu_core_utilization": [ + 47.0, + 86.9, + 54.7, + 49.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_034.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing white and red uniforms. The players are seen running around the field, passing the ball to each other, and trying to score goals. The video also shows a close-up of a player scoring a goal, and the players celebrating their victory. The video is shot in slow motion, highlighting the action and movement of the players. The video is shot in a stadium, with the audience visible in the background. The video is a great representation of the excitement and energy of a soccer match.", + "tokens_per_second": 11.600796413969313, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 111, + "inference_time": 14.606523036956787, + "cpu_usage": 50.2, + "cpu_core_utilization": [ + 37.3, + 59.9, + 38.3, + 65.4 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_035.mp4": { + "response": "The video captures a soccer match between two teams, with the players running around the field and the ball being passed to each other. The players are wearing blue and white uniforms, and the audience is cheering them on. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to each other. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to each other. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to each other. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to each other. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to each other. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to each other. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to each other. The video also shows a replay of the same scene, with the players running around the field and the ball being passed to", + "tokens_per_second": 13.475444822957598, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 25.259315729141235, + "cpu_usage": 45.0, + "cpu_core_utilization": [ + 57.4, + 30.4, + 64.5, + 27.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_036.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team scoring a goal. The players are seen running around the field, with the blue team celebrating their goal. The video also shows a replay of the goal, with the blue team celebrating again. The video is shot in slow motion, highlighting the excitement and energy of the game.", + "tokens_per_second": 10.181369513872784, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 69, + "inference_time": 11.76690411567688, + "cpu_usage": 56.8, + "cpu_core_utilization": [ + 41.9, + 41.8, + 92.1, + 50.9 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_037.mp4": { + "response": "The video captures a soccer match between two teams, with the players running around the field and the ball being kicked into the goal. The players are wearing different colored uniforms, and the ball is being kicked by a player in a blue uniform. The video also shows a close-up of the ball being kicked into the goal, and the players are seen celebrating the goal. The video also shows a shot of a soccer player in a yellow uniform, and a shot of a soccer player in a blue uniform. The video also shows a shot of a soccer player in a green uniform, and a shot of a soccer player in a white uniform. The video also shows a shot of a soccer player in a red uniform, and a shot of a soccer player in a black uniform. The video also shows a shot of a soccer player in a blue uniform, and a shot of a soccer player in a white uniform. The video also shows a shot of a soccer player in a red uniform, and a shot of a soccer player in a black uniform. The video also shows a shot of a soccer player in a blue uniform, and a shot of a soccer player in a white uniform. The video also shows a shot of a soccer player in a red uniform, and a shot of a soccer", + "tokens_per_second": 13.31561753455536, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.409454345703125, + "cpu_usage": 42.8, + "cpu_core_utilization": [ + 24.5, + 26.3, + 27.6, + 92.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_038.mp4": { + "response": "The video captures a soccer match between two teams, with the players wearing different colored uniforms. The players are seen running around the field, passing the ball, and making goals. The video also shows a replay of a goal scored by one of the players. The video is shot in slow motion, highlighting the action and movement of the players. The video is shot in a close-up view, focusing on the players and their movements. The video is shot in a fast motion, highlighting the action and movement of the players. The video is shot in a slow motion, highlighting the action and movement of the players. The video is shot in a fast motion, highlighting the action and movement of the players. The video is shot in a slow motion, highlighting the action and movement of the players. The video is shot in a fast motion, highlighting the action and movement of the players. The video is shot in a slow motion, highlighting the action and movement of the players. The video is shot in a fast motion, highlighting the action and movement of the players. The video is shot in a slow motion, highlighting the action and movement of the players. The video is shot in a fast motion, highlighting the action and movement of the players. The video is shot in a slow motion", + "tokens_per_second": 13.391948929733484, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 23.618805408477783, + "cpu_usage": 41.4, + "cpu_core_utilization": [ + 26.0, + 21.7, + 23.0, + 95.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_039.mp4": { + "response": "The video shows a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video also shows the same match again in slow motion. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a replay of a soccer match between two teams, with the ball being kicked into the goal by one of the players. The video is a", + "tokens_per_second": 13.257980068291804, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.98432445526123, + "cpu_usage": 44.0, + "cpu_core_utilization": [ + 30.0, + 85.9, + 27.0, + 33.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_040.mp4": { + "response": "The video captures a soccer match between two teams, with the blue team wearing blue shirts and the white team wearing white shirts. The blue team scores a goal, and the players celebrate. The video also shows a replay of the goal, highlighting the player who scored it. The video also shows the players running around the field, trying to score more goals. The video is shot from an aerial perspective, providing a bird's eye view of the game. The video is shot in 4k resolution, providing a high-quality viewing experience. The video is shot in 1080p resolution, providing a clear and detailed view of the game. The video is shot in 1080p resolution, providing a clear and detailed view of the game. The video is shot in 1080p resolution, providing a clear and detailed view of the game. The video is shot in 1080p resolution, providing a clear and detailed view of the game. The video is shot in 1080p resolution, providing a clear and detailed view of the game. The video is shot in 1080p resolution, providing a clear and detailed view of the game. The video is shot in 1080p", + "tokens_per_second": 13.47823802494779, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 25.217121839523315, + "cpu_usage": 45.0, + "cpu_core_utilization": [ + 59.7, + 42.4, + 48.5, + 29.5 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_041.mp4": { + "response": "The video captures a soccer match between two teams, with the audience cheering and the players running around the field. The video features several shots of the players, including a player wearing a yellow jersey and a player wearing a blue jersey. The video also shows the audience cheering and the players running around the field.", + "tokens_per_second": 9.739024302972952, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 61, + "inference_time": 10.97818636894226, + "cpu_usage": 57.1, + "cpu_core_utilization": [ + 41.5, + 45.6, + 92.8, + 48.6 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_042.mp4": { + "response": "The video showcases a soccer match between two teams, with a focus on the goal scoring. The players are seen running around the field, with one player scoring a goal and the other player falling down. The video also features a replay of the goal, with the same player scoring again. The video also shows a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of a different goal scored by the same player, with the same player falling down again. The video also features a replay of", + "tokens_per_second": 13.462089064329856, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 256, + "inference_time": 24.583365201950073, + "cpu_usage": 43.8, + "cpu_core_utilization": [ + 36.6, + 25.6, + 82.1, + 30.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_043.mp4": { + "response": "The video shows a soccer match between Barcelona and Liverpool. The match is being played on a field with a crowd of spectators. The players are wearing blue and white shirts, and the ball is in the air. The video also shows other soccer matches from different years, including a match between Barcelona and Athletic Club in 2013.", + "tokens_per_second": 10.093422721507519, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 68, + "inference_time": 13.122522830963135, + "cpu_usage": 59.8, + "cpu_core_utilization": [ + 88.6, + 48.6, + 49.1, + 52.8 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_044.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a particular player. The player is seen dribbling the ball and making a goal, while the opposing team tries to block the shot. The video also shows other moments from the game, including a player falling to the ground and the crowd cheering. The video is shot in slow motion, highlighting the player's skill and the intensity of the match.", + "tokens_per_second": 10.718624704506539, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 83, + "inference_time": 14.035686731338501, + "cpu_usage": 57.9, + "cpu_core_utilization": [ + 49.1, + 82.8, + 46.4, + 53.1 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_045.mp4": { + "response": "The video captures a soccer match between two teams, with the focus on a player wearing a green and yellow uniform. The player is seen dribbling the ball and making a shot on goal, which is blocked by the goalkeeper. The video also shows other players from both teams, with one player wearing a white and red uniform. The video is shot from a high angle, providing a bird's eye view of the game. The video ends with the words \"Wouva\" written on the screen.", + "tokens_per_second": 11.17376621816953, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 100, + "inference_time": 13.780639886856079, + "cpu_usage": 51.4, + "cpu_core_utilization": [ + 35.8, + 78.6, + 39.3, + 52.0 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_046.mp4": { + "response": "The video captures a soccer match between two teams, with the audience in the background. The players are seen running around the field, and the ball is seen flying through the air. The video ends with a message of appreciation for the viewers.", + "tokens_per_second": 8.889578335077823, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 48, + "inference_time": 10.128128290176392, + "cpu_usage": 57.5, + "cpu_core_utilization": [ + 86.8, + 47.6, + 52.0, + 43.7 + ] + }, + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_60s/messi_part_047.mp4": { + "response": "The video features a black and white logo with a play button, which is likely the logo for the channel \"Wouva.\" The logo is displayed on a black background, and there are also other logos for social media platforms such as YouTube, Instagram, and Twitter. The video appears to be a promotional or introductory clip for the channel.", + "tokens_per_second": 10.1511797285819, + "peak_gpu_memory_mb": 4498.375, + "num_generated_tokens": 68, + "inference_time": 9.243825912475586, + "cpu_usage": 46.0, + "cpu_core_utilization": [ + 36.6, + 28.2, + 86.3, + 32.9 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/outputs/qwen_30s.json b/API_Transformers/outputs/qwen_30s.json new file mode 100644 index 0000000000000000000000000000000000000000..8223de3f2f5ac3f0bddd401d90ab4d2a6889f489 --- /dev/null +++ b/API_Transformers/outputs/qwen_30s.json @@ -0,0 +1,16 @@ +{ + "/mnt/data/xiuying/Code/local_deploy/messi/Clips_30s/messi_part_001.mp4": { + "response": "The video shows a soccer match between FC Barcelona and Malaga. The match took place on January 23, 2016, and was part of La Liga. The video appears to be a freeze frame from a soccer game, with the scoreboard visible in the background. The scoreboard shows that FC Barcelona is currently leading with a score of 1-0. The video is presented by WOUVA.", + "tokens_per_second": 4.974079275567591, + "peak_gpu_memory_mb": 9376.375, + "num_generated_tokens": 84, + "inference_time": 21.924885272979736, + "cpu_usage": 60.2, + "cpu_core_utilization": [ + 60.4, + 56.6, + 56.5, + 67.3 + ] + } +} \ No newline at end of file diff --git a/API_Transformers/scripts/run_gemma.sh b/API_Transformers/scripts/run_gemma.sh new file mode 100644 index 0000000000000000000000000000000000000000..415adffc5b9a484ec8ea3585d467422733096dcf --- /dev/null +++ b/API_Transformers/scripts/run_gemma.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path google/gemma-3-4b-it \ \ No newline at end of file diff --git a/API_Transformers/scripts/run_lfm.sh b/API_Transformers/scripts/run_lfm.sh new file mode 100644 index 0000000000000000000000000000000000000000..9d0cc9a7e8aebc6a70098d7dcd96eaeea618fa35 --- /dev/null +++ b/API_Transformers/scripts/run_lfm.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path LiquidAI/LFM2-VL-1.6B \ \ No newline at end of file diff --git a/API_Transformers/scripts/run_minicpm.sh b/API_Transformers/scripts/run_minicpm.sh new file mode 100644 index 0000000000000000000000000000000000000000..ec2ad969db0825d8169e8fec5c311e3f587f6465 --- /dev/null +++ b/API_Transformers/scripts/run_minicpm.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path openbmb/MiniCPM-V-4 \ \ No newline at end of file diff --git a/API_Transformers/scripts/run_qwen.sh b/API_Transformers/scripts/run_qwen.sh new file mode 100644 index 0000000000000000000000000000000000000000..fc75312b0c942ba7f80112f9911fec80ea7c40bd --- /dev/null +++ b/API_Transformers/scripts/run_qwen.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path Qwen/Qwen2-VL-2B-Instruct-AWQ \ \ No newline at end of file diff --git a/API_Transformers/scripts/run_qwen2_5.sh b/API_Transformers/scripts/run_qwen2_5.sh new file mode 100644 index 0000000000000000000000000000000000000000..208c39c1fd7a9a3aeef199fd5cd9b092f24027df --- /dev/null +++ b/API_Transformers/scripts/run_qwen2_5.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path Qwen/Qwen2.5-VL-3B-Instruct-AWQ \ diff --git a/API_Transformers/split.py b/API_Transformers/split.py new file mode 100644 index 0000000000000000000000000000000000000000..89e510c92825c476eb0cecdb11b3db4818a25f9d --- /dev/null +++ b/API_Transformers/split.py @@ -0,0 +1,91 @@ +import os +from concurrent.futures import ThreadPoolExecutor +import math +import cv2 +from moviepy.editor import VideoFileClip +import tqdm + +def split_video_segment(input_video_path, output_dir, start_time, end_time, output_filename): + """使用 moviepy 提取并保存一个视频片段。""" + output_path = os.path.join(output_dir, output_filename) + try: + # 使用 with 语句可以确保资源被正确释放 + with VideoFileClip(input_video_path) as video: + # 提取子片段 + subclip = video.subclip(start_time, end_time) + # 仅写视频不写音频,并使用 ultrafast 预设以提升速度 + subclip.write_videofile( + output_path, + codec='libx264', + audio=False, + preset='ultrafast', + threads=os.cpu_count() or 4, + logger=None, + ) + print(f"成功创建: {output_filename}") + except Exception as e: + # 这里我们先打印错误信息 + print(f"分割片段 {output_filename} 时出错: {e}") + print("已设置为仅视频输出(audio=False)。请检查输入/输出格式兼容性或更换编码器。") + + +def main(): + # --- 请在这里配置 --- + input_video = "/mnt/data/xiuying/Code/local_deploy/video/video.mp4" # <--- 请修改为你的视频文件路径 + output_dir = "/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s" # <--- 请修改为输出目录名 + split_seconds = 60 # <--- 请修改为每个片段的秒数 (n) + # -------------------- + + # 使用CPU核心数作为最大线程数,可以根据需要调整 + max_threads = os.cpu_count() or 4 + + # 检查输入视频是否存在 + if not os.path.exists(input_video): + print(f"错误:输入视频文件不存在 -> {input_video}") + return + + # 创建输出目录 + if not os.path.exists(output_dir): + os.makedirs(output_dir) + print(f"创建输出目录: {output_dir}") + + total_duration = 0 + try: + # 获取视频总时长 + with VideoFileClip(input_video) as video: + total_duration = video.duration + except Exception as e: + print(f"使用 moviepy 读取视频文件时出错: {e}") + print("请确保 ffmpeg 已正确安装并可以被 moviepy 调用。") + return + + print(f"视频总时长: {total_duration:.2f} 秒") + + # 创建分割任务列表 + tasks = [] + num_clips = math.ceil(total_duration / split_seconds) + base_filename, file_extension = os.path.splitext(os.path.basename(input_video)) + + for i in range(num_clips): + start_time = i * split_seconds + # 确保结束时间不会超过视频总长 + end_time = min(start_time + split_seconds, total_duration) + + # 如果计算出的开始时间已经等于或超过总时长,则停止 + if start_time >= total_duration: + break + + output_filename = f"{base_filename}_part_{i+1:03d}{file_extension}" + tasks.append((input_video, output_dir, start_time, end_time, output_filename)) + + print(f"准备将视频分割成 {len(tasks)} 个片段...") + + # 使用线程池执行分割任务 + with ThreadPoolExecutor(max_workers=max_threads) as executor: + executor.map(lambda p: split_video_segment(*p), tasks) + + print("\n所有视频片段处理完成!") + + +if __name__ == "__main__": + main() diff --git a/API_Transformers/test.py b/API_Transformers/test.py new file mode 100644 index 0000000000000000000000000000000000000000..84be0c25badf714ae49fdcca4893a7fa9973e9f0 --- /dev/null +++ b/API_Transformers/test.py @@ -0,0 +1,54 @@ +import os +import subprocess +import sys +import argparse +import json +import time +from tqdm import tqdm +output_dir = "/mnt/data/xiuying/Code/local_deploy/output_0821" + +parser = argparse.ArgumentParser() +parser.add_argument("--model", type=str, default="LFM") +args = parser.parse_args() +output_dir = os.path.join(output_dir, args.model) +os.makedirs(output_dir, exist_ok=True) +VIDEO_FILE_DIR = "/mnt/data/xiuying/Code/local_deploy/video/new/Clips_60s" + +# API服务器的URL +API_URL = "http://127.0.0.1:8010/video-inference/" + + +PROMPT = "Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves." + +files = os.listdir(VIDEO_FILE_DIR) +files.sort() +total_output = {} +cur_time = time.strftime("%Y%m%d_%H%M%S", time.localtime()) +output_file_path = os.path.join(output_dir, cur_time, f"{VIDEO_FILE_DIR.split('/')[-1]}.json") +os.makedirs(os.path.join(output_dir, cur_time), exist_ok=True) +for file in tqdm(files): + video_file_path = os.path.join(VIDEO_FILE_DIR, file) + start_time = time.time() + command = ( + f"curl -v -X POST '{API_URL}' " + f"-F \"prompt={PROMPT}\" " + f"-F 'video_file={video_file_path}' " + f"-F 'sampling_method=uniform' " + f"-F 'sampling_rate=30' " + ) + + print("将要执行以下 cURL 命令:") + print("---------------------------------") + print(command) + print("---------------------------------") + print("\n正在执行...\n") + + return_result = subprocess.check_output(command, shell=True) + response = json.loads(return_result) + total_output[file] = response + end_time = time.time() + total_output[file]["request_time"] = end_time - start_time + with open(output_file_path, "w") as f: + json.dump(total_output, f, indent=4) + +print("\n\n✅ 测试脚本执行完毕。") diff --git a/API_Transformers/video_processor.py b/API_Transformers/video_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..cba27cea181d9264edbfcfa7020f896d2c097c0e --- /dev/null +++ b/API_Transformers/video_processor.py @@ -0,0 +1,124 @@ +import cv2 +import numpy as np +import base64 +from typing import List +from enum import Enum +from skimage.metrics import structural_similarity as ssim + +class FrameSamplingMethod(str, Enum): + UNIFORM = "uniform" + CONTENT_AWARE = "content_aware" + +def extract_frames( + video_path: str, + method: FrameSamplingMethod, + sampling_rate: int +) -> List[np.ndarray]: + """ + 从视频中提取帧。 + 对于UNIFORM方法,sampling_rate表示要提取的总帧数。 + 对于CONTENT_AWARE方法,sampling_rate现在也表示要提取的总帧数,但会选择变化最大的帧。 + """ + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + raise IOError(f"Cannot open video file: {video_path}") + + frames = [] + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + target_size = (420, 280) # (width, height) + + if method == FrameSamplingMethod.UNIFORM: + if sampling_rate <= 0: + cap.release() + return [] + + # 如果请求的帧数大于总帧数,则返回所有帧 + if sampling_rate >= total_frames: + while True: + ret, frame = cap.read() + if not ret: + break + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + cap.release() + return frames + + # 计算采样间隔 + step = total_frames / sampling_rate + for i in range(sampling_rate): + frame_index = int(i * step) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index) + ret, frame = cap.read() + if ret: + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + + elif method == FrameSamplingMethod.CONTENT_AWARE: + if sampling_rate <= 0: + cap.release() + return [] + + # 如果视频总帧数少于或等于请求的帧数,则返回所有帧 + if total_frames <= sampling_rate: + while True: + ret, frame = cap.read() + if not ret: + break + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + cap.release() + return frames + + # --- Pass 1: 计算所有相邻帧的SSIM分数 --- + ssim_scores = [] + cap.set(cv2.CAP_PROP_POS_FRAMES, 0) + ret, prev_frame = cap.read() + if not ret: + cap.release() + return [] + + prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY) + + for i in range(1, total_frames): + ret, current_frame = cap.read() + if not ret: + break + + current_frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) + score, _ = ssim(prev_frame_gray, current_frame_gray, full=True) + ssim_scores.append((score, i)) # 存储(ssim_score, frame_index) + prev_frame_gray = current_frame_gray + + # --- 选择变化最大的 n-1 帧 --- + # 按SSIM分数升序排序 (分数越低,差异越大) + ssim_scores.sort(key=lambda x: x[0]) + + # 选择分数最低的 n-1 帧的索引 + selected_indices = {score[1] for score in ssim_scores[:sampling_rate - 1]} + # 始终包括第一帧 (index 0) + selected_indices.add(0) + + # --- Pass 2: 根据索引提取帧 --- + sorted_indices = sorted(list(selected_indices)) + for idx in sorted_indices: + cap.set(cv2.CAP_PROP_POS_FRAMES, idx) + ret, frame = cap.read() + if ret: + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + + cap.release() + return frames + +def encode_frames_to_base64(frames: List[np.ndarray]) -> List[str]: + """ + 将OpenCV帧列表编码为base64字符串列表。 + """ + base64_frames = [] + for frame in frames: + # 将帧编码为JPEG格式 + _, buffer = cv2.imencode('.jpg', frame) + # 将缓冲区字节转换为base64字符串 + base64_str = base64.b64encode(buffer).decode('utf-8') + base64_frames.append(base64_str) + return base64_frames \ No newline at end of file diff --git a/Direct_Transformers/__pycache__/video_processor.cpython-311.pyc b/Direct_Transformers/__pycache__/video_processor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7d400ec73b24f5d09b157f573a00e8e748f012d8 Binary files /dev/null and b/Direct_Transformers/__pycache__/video_processor.cpython-311.pyc differ diff --git a/Direct_Transformers/cal.py b/Direct_Transformers/cal.py new file mode 100644 index 0000000000000000000000000000000000000000..957ae3f5d551c5a1a389fb9339de5e1a0baf0691 --- /dev/null +++ b/Direct_Transformers/cal.py @@ -0,0 +1,18 @@ +import json + +metric = { + "tokens_per_second": [], + "peak_gpu_memory_mb": [], + "num_generated_tokens": [], + "inference_time": [], + "cpu_usage": [], +} +for key, value in json.load(open("/mnt/data/xiuying/Code/test/outputs/MiniCPM-V-4-int4/20250822_110321.json")).items(): + metric["tokens_per_second"].append(value["tokens_per_second"]) + metric["peak_gpu_memory_mb"].append(value["peak_gpu_memory_mb"]) + metric["num_generated_tokens"].append(value["num_generated_tokens"]) + metric["inference_time"].append(value["inference_time"]) + metric["cpu_usage"].append(value["cpu_usage"]) + +for key, value in metric.items(): + print(key, sum(value) / len(value)) \ No newline at end of file diff --git a/Direct_Transformers/infer.py b/Direct_Transformers/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..d452693f680871dca89eb9bc2058820c0376a897 --- /dev/null +++ b/Direct_Transformers/infer.py @@ -0,0 +1,166 @@ +import os +import uuid +import time +import psutil +import torch +import cv2 +import shutil +from models.qwen import Qwen2VL +from models.gemma import Gemma +from models.minicpm import MiniCPM +from models.lfm import LFM2 +from video_processor import extract_frames, FrameSamplingMethod +import argparse +import json +import logging +from tqdm import tqdm +TEMP_VIDEO_DIR = "temp_videos" +def process_video(model, video_path, prompt, sampling_method_str="CONTENT_AWARE", sampling_rate=5): + """ + 直接处理视频和文本提示,进行推理并返回结果。 + + Args: + video_path (str): 视频文件路径 + prompt (str): 文本提示 + sampling_method_str (str): 采样方法字符串 + sampling_rate (int): 采样率或阈值 + + Returns: + dict: 推理结果 + """ + request_start_time = time.time() + request_id = str(uuid.uuid4()) + logging.info(f"[{request_id}] Processing video: '{video_path}', Prompt: '{prompt}'") + + # 验证视频文件 + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found: {video_path}") + + if not video_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')): + logging.warning(f"[{request_id}] File '{video_path}' may not be a video file.") + + # 转换采样方法字符串为枚举 + sampling_method_map = { + "CONTENT_AWARE": FrameSamplingMethod.CONTENT_AWARE, + "UNIFORM": FrameSamplingMethod.UNIFORM, + } + sampling_method = sampling_method_map.get(sampling_method_str, FrameSamplingMethod.CONTENT_AWARE) + + # 创建临时目录 + temp_frame_dir = os.path.join(TEMP_VIDEO_DIR, request_id) + os.makedirs(temp_frame_dir, exist_ok=True) + + try: + logging.info(f"[{request_id}] Extracting frames using method: {sampling_method.value}, rate/threshold: {sampling_rate}") + + frames = extract_frames(video_path, sampling_method, sampling_rate) + if not frames: + raise ValueError(f"Could not extract any frames from the video: {video_path}") + + logging.info(f"[{request_id}] Extracted {len(frames)} frames successfully. Saving to temporary files...") + + # 将帧保存到临时文件并获取其路径 + frame_paths = [] + for i, frame in enumerate(frames): + frame_path = os.path.join(temp_frame_dir, f"frame_{i:04d}.jpg") + cv2.imwrite(frame_path, frame) + abs_frame_path = os.path.abspath(frame_path) + frame_paths.append(abs_frame_path) + + logging.info(f"[{request_id}] {len(frame_paths)} frames saved to {temp_frame_dir}") + + # 进行推理 + output = model.generate(frame_paths, prompt) + + logging.info(f"Tokens per second: {output['tokens_per_second']}, Peak GPU memory MB: {output['peak_gpu_memory_mb']}") + + inference_end_time = time.time() + cpu_usage = psutil.cpu_percent(interval=None) + cpu_core_utilization = psutil.cpu_percent(interval=None, percpu=True) + logging.info(f"[{request_id}] Inference time: {inference_end_time - request_start_time:.2f} seconds, CPU usage: {cpu_usage}%, CPU core utilization: {cpu_core_utilization}") + + # 添加性能指标到输出 + output["inference_time"] = inference_end_time - request_start_time + output["cpu_usage"] = cpu_usage + output["cpu_core_utilization"] = cpu_core_utilization + output["num_generated_tokens"] = output["num_generated_tokens"] + output["request_id"] = request_id + + return output + + except Exception as e: + logging.error(f"[{request_id}] An error occurred during processing: {str(e)}", exc_info=True) + raise e + finally: + # 清理临时文件 + if os.path.exists(temp_frame_dir): + shutil.rmtree(temp_frame_dir) + logging.info(f"[{request_id}] Cleaned up temporary frame directory: {temp_frame_dir}") + + +def main(): + """主函数""" + try: + parser = argparse.ArgumentParser() + parser.add_argument("--model_path", type=str, default="Qwen/Qwen2.5-VL-3B-Instruct-AWQ") + parser.add_argument("--video_dir", type=str, default="videos", help="视频") + parser.add_argument("--prompt", type=str, default="Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.", help="文本提示") + parser.add_argument("--sampling_method", type=str, default="UNIFORM", + choices=["CONTENT_AWARE", "UNIFORM", "RANDOM"], + help="帧采样方法") + parser.add_argument("--sampling_rate", type=int, default=30, help="采样率或阈值") + args = parser.parse_args() + + + # --- 日志和临时文件目录配置 --- + LOG_DIR = f"logs/{args.model_path.split('/')[-1]}" + OUTPUT_DIR = f"outputs/{args.model_path.split('/')[-1]}" + os.makedirs(LOG_DIR, exist_ok=True) + os.makedirs(OUTPUT_DIR, exist_ok=True) + os.makedirs(TEMP_VIDEO_DIR, exist_ok=True) + start_time = time.strftime('%Y%m%d_%H%M%S') + log_filename = f"{LOG_DIR}/{start_time}.log" + logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename=log_filename, filemode='a') + + # --- 加载模型和处理器 --- + logging.info(f"Loading model: {args.model_path}") + model_load_start = time.time() + if "qwen" in args.model_path.lower(): + model = Qwen2VL(args.model_path) + elif "gemma" in args.model_path.lower(): + model = Gemma(args.model_path) + elif "minicpm" in args.model_path.lower(): + model = MiniCPM(args.model_path) + elif "lfm" in args.model_path.lower(): + model = LFM2(args.model_path) + model_load_end = time.time() + GPU_MEMORY_USAGE = f"{torch.cuda.memory_allocated(0)/1024**2:.2f} MB" if torch.cuda.is_available() else "N/A" + logging.info(f"Model loaded in {model_load_end - model_load_start:.2f} seconds") + logging.info(f"GPU Memory Usage after model load: {GPU_MEMORY_USAGE}") + # 处理视频 + total_output = {} + for video_path in tqdm(os.listdir(args.video_dir)): + result = process_video( + model=model, + video_path=os.path.join(args.video_dir, video_path), + prompt=args.prompt, + sampling_method_str=args.sampling_method, + sampling_rate=args.sampling_rate + ) + total_output[video_path] = result + # 保存结果到文件 + output_filename = f"{OUTPUT_DIR}/{start_time}.json" + with open(output_filename, 'w', encoding='utf-8') as f: + json.dump(total_output, f, ensure_ascii=False, indent=2) + + print(f"处理完成!结果已保存到: {output_filename}") + print(f"推理时间: {result['inference_time']:.2f} 秒") + print(f"生成的内容: {result.get('generated_text', 'N/A')}") + + except Exception as e: + logging.error(f"处理失败: {str(e)}", exc_info=True) + print(f"处理失败: {str(e)}") + + +if __name__ == "__main__": + main() diff --git a/Direct_Transformers/load.py b/Direct_Transformers/load.py new file mode 100644 index 0000000000000000000000000000000000000000..bcffd9649d4f887653de1642f1a4ebd4db74485d --- /dev/null +++ b/Direct_Transformers/load.py @@ -0,0 +1,18 @@ +from transformers import AutoModel, AutoTokenizer +import torch +import time + +model_id = "openbmb/MiniCPM-V-4-int4" + +model = AutoModel.from_pretrained( + model_id, + trust_remote_code=True, + attn_implementation='sdpa', + torch_dtype=torch.bfloat16 +) +model = model.eval().cuda() +tokenizer = AutoTokenizer.from_pretrained( + model_id, trust_remote_code=True +) + +time.sleep(1000000) \ No newline at end of file diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_041834.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_041834.log new file mode 100644 index 0000000000000000000000000000000000000000..b6466973cd13cd1249cb31b59f6196b2d28dad3c --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_041834.log @@ -0,0 +1,41 @@ +2025-08-22 04:18:34 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:18:37 - INFO - vision_config is None, using default vision config +2025-08-22 04:19:29 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 132, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_042235.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_042235.log new file mode 100644 index 0000000000000000000000000000000000000000..24fab25687ff8130e61fc863648e13c4a94822cd --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_042235.log @@ -0,0 +1,41 @@ +2025-08-22 04:22:35 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:22:35 - INFO - vision_config is None, using default vision config +2025-08-22 04:22:37 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 132, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_042311.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_042311.log new file mode 100644 index 0000000000000000000000000000000000000000..9ac0b29beecc4e471369fb5efc21009a1952ba7b --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_042311.log @@ -0,0 +1,41 @@ +2025-08-22 04:23:11 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:23:12 - INFO - vision_config is None, using default vision config +2025-08-22 04:23:14 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 132, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_044310.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_044310.log new file mode 100644 index 0000000000000000000000000000000000000000..0d847acbeda4c5b19371245a228f8252a474a527 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_044310.log @@ -0,0 +1,41 @@ +2025-08-22 04:43:10 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:43:10 - INFO - vision_config is None, using default vision config +2025-08-22 04:43:13 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 132, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045314.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045314.log new file mode 100644 index 0000000000000000000000000000000000000000..4db1a4ef2a943a8cfa7ab7bc51be85771542e922 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045314.log @@ -0,0 +1,41 @@ +2025-08-22 04:53:14 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:53:14 - INFO - vision_config is None, using default vision config +2025-08-22 04:53:18 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045801.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045801.log new file mode 100644 index 0000000000000000000000000000000000000000..62686a49ead4e4416fb43c23033c409f6b4f89a7 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045801.log @@ -0,0 +1,41 @@ +2025-08-22 04:58:01 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:58:02 - INFO - vision_config is None, using default vision config +2025-08-22 04:58:04 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045903.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045903.log new file mode 100644 index 0000000000000000000000000000000000000000..f4da26cbfeab29a41141d83ecd91bd09e4d01238 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045903.log @@ -0,0 +1,41 @@ +2025-08-22 04:59:03 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:59:03 - INFO - vision_config is None, using default vision config +2025-08-22 04:59:06 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045952.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045952.log new file mode 100644 index 0000000000000000000000000000000000000000..bcf4f8a8ff11144e463d9cbb2f24238099060366 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_045952.log @@ -0,0 +1,41 @@ +2025-08-22 04:59:52 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 04:59:52 - INFO - vision_config is None, using default vision config +2025-08-22 04:59:54 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050035.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050035.log new file mode 100644 index 0000000000000000000000000000000000000000..2d7d436322500d180f5e986cf7aa78ee1f602743 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050035.log @@ -0,0 +1,41 @@ +2025-08-22 05:00:35 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 05:00:35 - INFO - vision_config is None, using default vision config +2025-08-22 05:00:37 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5023, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050337.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050337.log new file mode 100644 index 0000000000000000000000000000000000000000..3a33e56d77be1e2f96661ee23ae8599a086f278f --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050337.log @@ -0,0 +1,41 @@ +2025-08-22 05:03:37 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 05:03:38 - INFO - vision_config is None, using default vision config +2025-08-22 05:03:41 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5028, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050407.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050407.log new file mode 100644 index 0000000000000000000000000000000000000000..bf907e91948ecdb7863dc75712ebd9342c484d67 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_050407.log @@ -0,0 +1,41 @@ +2025-08-22 05:04:07 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 05:04:07 - INFO - vision_config is None, using default vision config +2025-08-22 05:04:10 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5028, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051302.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051302.log new file mode 100644 index 0000000000000000000000000000000000000000..7ed5e2db3bb97ea2126d4fbe4ef40670839fdd57 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051302.log @@ -0,0 +1,41 @@ +2025-08-22 05:13:02 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 05:13:02 - INFO - vision_config is None, using default vision config +2025-08-22 05:13:05 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 306, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5141, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 132, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051647.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051647.log new file mode 100644 index 0000000000000000000000000000000000000000..12540f5b4a6dfbf66cf8bb78073b5994adfd1465 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051647.log @@ -0,0 +1,41 @@ +2025-08-22 05:16:47 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 05:16:48 - INFO - vision_config is None, using default vision config +2025-08-22 05:16:50 - ERROR - 处理失败: +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 306, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5141, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 187, in replace_with_awq_linear + _, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + [Previous line repeated 2 more times] + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 174, in replace_with_awq_linear + model._modules[name] = target_cls( + ^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/modules/linear/gemm.py", line 130, in __init__ + assert self.in_features % self.group_size == 0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +AssertionError diff --git a/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051732.log b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051732.log new file mode 100644 index 0000000000000000000000000000000000000000..809b9bed7e7352dae9514e940b59d43d013dae00 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-AWQ/20250822_051732.log @@ -0,0 +1,35 @@ +2025-08-22 05:17:32 - INFO - Loading model: openbmb/MiniCPM-V-4-AWQ +2025-08-22 05:17:33 - INFO - vision_config is None, using default vision config +2025-08-22 05:17:35 - ERROR - 处理失败: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 133, in main + model = MiniCPM(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/minicpm.py", line 13, in __init__ + self.model = AutoModel.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py", line 593, in from_pretrained + return model_class.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 306, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5141, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 134, in replace_with_awq_linear + from awq.modules.linear.gemm import WQLinear_GEMM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/__init__.py", line 2, in + from awq.models.auto import AutoAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/__init__.py", line 1, in + from .mpt import MptAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in + from .base import BaseAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/base.py", line 13, in + from transformers.modeling_utils import shard_checkpoint +ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_044952.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_044952.log new file mode 100644 index 0000000000000000000000000000000000000000..b9a43bf20662463305ed63fc780096026fcbd234 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_044952.log @@ -0,0 +1,36 @@ +2025-08-22 04:49:52 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 04:49:54 - INFO - vision_config is None, using default vision config +2025-08-22 04:50:54 - INFO - Model loaded in 62.69 seconds +2025-08-22 04:50:54 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 04:50:54 - INFO - [d017fd76-3fd3-43ac-96a2-634c37d06506] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:50:54 - INFO - [d017fd76-3fd3-43ac-96a2-634c37d06506] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:50:55 - INFO - [d017fd76-3fd3-43ac-96a2-634c37d06506] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:50:56 - INFO - [d017fd76-3fd3-43ac-96a2-634c37d06506] 30 frames saved to temp_videos/d017fd76-3fd3-43ac-96a2-634c37d06506 +2025-08-22 04:51:15 - INFO - vision_config is None, using default vision config +2025-08-22 04:51:27 - INFO - Tokens per second: 5.480411872376593, Peak GPU memory MB: 9236.375 +2025-08-22 04:51:27 - INFO - [d017fd76-3fd3-43ac-96a2-634c37d06506] Inference time: 33.09 seconds, CPU usage: 48.6%, CPU core utilization: [46.3, 44.5, 50.7, 52.9] +2025-08-22 04:51:27 - INFO - [d017fd76-3fd3-43ac-96a2-634c37d06506] Cleaned up temporary frame directory: temp_videos/d017fd76-3fd3-43ac-96a2-634c37d06506 +2025-08-22 04:51:27 - INFO - [39153060-6451-46d2-bd79-3907b25078cd] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:51:27 - INFO - [39153060-6451-46d2-bd79-3907b25078cd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:51:32 - INFO - [39153060-6451-46d2-bd79-3907b25078cd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:51:32 - INFO - [39153060-6451-46d2-bd79-3907b25078cd] 30 frames saved to temp_videos/39153060-6451-46d2-bd79-3907b25078cd +2025-08-22 04:51:45 - INFO - vision_config is None, using default vision config +2025-08-22 04:52:00 - INFO - Tokens per second: 7.228105259193224, Peak GPU memory MB: 9236.375 +2025-08-22 04:52:00 - INFO - [39153060-6451-46d2-bd79-3907b25078cd] Inference time: 32.26 seconds, CPU usage: 37.7%, CPU core utilization: [49.7, 33.2, 23.0, 45.0] +2025-08-22 04:52:00 - INFO - [39153060-6451-46d2-bd79-3907b25078cd] Cleaned up temporary frame directory: temp_videos/39153060-6451-46d2-bd79-3907b25078cd +2025-08-22 04:52:00 - INFO - [17f20038-cd68-4523-9ca8-e568483c3597] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:52:00 - INFO - [17f20038-cd68-4523-9ca8-e568483c3597] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:52:01 - INFO - [17f20038-cd68-4523-9ca8-e568483c3597] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:52:01 - INFO - [17f20038-cd68-4523-9ca8-e568483c3597] 30 frames saved to temp_videos/17f20038-cd68-4523-9ca8-e568483c3597 +2025-08-22 04:52:13 - INFO - vision_config is None, using default vision config +2025-08-22 04:52:24 - INFO - Tokens per second: 4.33591342429363, Peak GPU memory MB: 9236.375 +2025-08-22 04:52:24 - INFO - [17f20038-cd68-4523-9ca8-e568483c3597] Inference time: 24.36 seconds, CPU usage: 32.9%, CPU core utilization: [41.7, 14.4, 17.5, 58.0] +2025-08-22 04:52:24 - INFO - [17f20038-cd68-4523-9ca8-e568483c3597] Cleaned up temporary frame directory: temp_videos/17f20038-cd68-4523-9ca8-e568483c3597 +2025-08-22 04:52:24 - INFO - [00745cac-3194-4425-9e97-9a6df15d32b5] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:52:24 - INFO - [00745cac-3194-4425-9e97-9a6df15d32b5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:52:26 - INFO - [00745cac-3194-4425-9e97-9a6df15d32b5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:52:26 - INFO - [00745cac-3194-4425-9e97-9a6df15d32b5] 30 frames saved to temp_videos/00745cac-3194-4425-9e97-9a6df15d32b5 +2025-08-22 04:52:39 - INFO - vision_config is None, using default vision config +2025-08-22 04:52:55 - INFO - Tokens per second: 8.020485206497014, Peak GPU memory MB: 9236.375 +2025-08-22 04:52:55 - INFO - [00745cac-3194-4425-9e97-9a6df15d32b5] Inference time: 31.39 seconds, CPU usage: 42.2%, CPU core utilization: [23.0, 30.6, 48.4, 66.5] +2025-08-22 04:52:55 - INFO - [00745cac-3194-4425-9e97-9a6df15d32b5] Cleaned up temporary frame directory: temp_videos/00745cac-3194-4425-9e97-9a6df15d32b5 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_051942.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_051942.log new file mode 100644 index 0000000000000000000000000000000000000000..22f1bb22e5c523027672206e1ecee2703367bbb1 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_051942.log @@ -0,0 +1,10 @@ +2025-08-22 05:19:42 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 05:19:43 - INFO - vision_config is None, using default vision config +2025-08-22 05:20:01 - INFO - Model loaded in 18.83 seconds +2025-08-22 05:20:01 - INFO - GPU Memory Usage after model load: 2694.64 MB +2025-08-22 05:20:01 - INFO - [7c036f1b-ec9f-472a-9e97-2900176c83b1] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:20:01 - INFO - [7c036f1b-ec9f-472a-9e97-2900176c83b1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:20:02 - INFO - [7c036f1b-ec9f-472a-9e97-2900176c83b1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:20:02 - INFO - [7c036f1b-ec9f-472a-9e97-2900176c83b1] 30 frames saved to temp_videos/7c036f1b-ec9f-472a-9e97-2900176c83b1 +2025-08-22 05:20:17 - INFO - vision_config is None, using default vision config +2025-08-22 05:20:45 - INFO - [7c036f1b-ec9f-472a-9e97-2900176c83b1] Cleaned up temporary frame directory: temp_videos/7c036f1b-ec9f-472a-9e97-2900176c83b1 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_052134.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_052134.log new file mode 100644 index 0000000000000000000000000000000000000000..3a4192284b53aa1a1205d59e0287d44c5e60c057 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_052134.log @@ -0,0 +1,10 @@ +2025-08-22 05:21:34 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 05:21:34 - INFO - vision_config is None, using default vision config +2025-08-22 05:21:41 - INFO - Model loaded in 6.88 seconds +2025-08-22 05:21:41 - INFO - GPU Memory Usage after model load: 2694.64 MB +2025-08-22 05:21:41 - INFO - [b1130834-0115-4a56-831d-192b1629d030] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:21:41 - INFO - [b1130834-0115-4a56-831d-192b1629d030] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:21:42 - INFO - [b1130834-0115-4a56-831d-192b1629d030] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:21:42 - INFO - [b1130834-0115-4a56-831d-192b1629d030] 30 frames saved to temp_videos/b1130834-0115-4a56-831d-192b1629d030 +2025-08-22 05:21:56 - INFO - vision_config is None, using default vision config +2025-08-22 05:22:16 - INFO - [b1130834-0115-4a56-831d-192b1629d030] Cleaned up temporary frame directory: temp_videos/b1130834-0115-4a56-831d-192b1629d030 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_052912.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_052912.log new file mode 100644 index 0000000000000000000000000000000000000000..9ca60f89ffc7bbf27f07eaebb4f8de8e0df46199 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_052912.log @@ -0,0 +1,10 @@ +2025-08-22 05:29:12 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 05:29:13 - INFO - vision_config is None, using default vision config +2025-08-22 05:29:19 - INFO - Model loaded in 7.09 seconds +2025-08-22 05:29:19 - INFO - GPU Memory Usage after model load: 2694.64 MB +2025-08-22 05:29:19 - INFO - [e19fb6cb-1139-4db6-b550-2445cbae2036] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:29:19 - INFO - [e19fb6cb-1139-4db6-b550-2445cbae2036] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:29:20 - INFO - [e19fb6cb-1139-4db6-b550-2445cbae2036] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:29:20 - INFO - [e19fb6cb-1139-4db6-b550-2445cbae2036] 30 frames saved to temp_videos/e19fb6cb-1139-4db6-b550-2445cbae2036 +2025-08-22 05:29:34 - INFO - vision_config is None, using default vision config +2025-08-22 05:30:35 - INFO - [e19fb6cb-1139-4db6-b550-2445cbae2036] Cleaned up temporary frame directory: temp_videos/e19fb6cb-1139-4db6-b550-2445cbae2036 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_055248.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_055248.log new file mode 100644 index 0000000000000000000000000000000000000000..209129a97295a5b9d52182b9d54a31d5e862aaa7 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_055248.log @@ -0,0 +1,44 @@ +2025-08-22 05:52:48 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 05:52:49 - INFO - vision_config is None, using default vision config +2025-08-22 05:53:14 - INFO - Model loaded in 25.68 seconds +2025-08-22 05:53:14 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 05:53:14 - INFO - [d42855a3-8e48-4035-a53f-8de3090c06ed] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:53:14 - INFO - [d42855a3-8e48-4035-a53f-8de3090c06ed] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:53:17 - INFO - [d42855a3-8e48-4035-a53f-8de3090c06ed] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:53:17 - INFO - [d42855a3-8e48-4035-a53f-8de3090c06ed] 30 frames saved to temp_videos/d42855a3-8e48-4035-a53f-8de3090c06ed +2025-08-22 05:53:34 - INFO - vision_config is None, using default vision config +2025-08-22 05:53:58 - INFO - Tokens per second: 9.627250655320038, Peak GPU memory MB: 9236.375 +2025-08-22 05:53:58 - INFO - [d42855a3-8e48-4035-a53f-8de3090c06ed] Inference time: 44.06 seconds, CPU usage: 23.0%, CPU core utilization: [16.7, 19.6, 22.1, 33.5] +2025-08-22 05:53:58 - INFO - [d42855a3-8e48-4035-a53f-8de3090c06ed] Cleaned up temporary frame directory: temp_videos/d42855a3-8e48-4035-a53f-8de3090c06ed +2025-08-22 05:53:58 - INFO - [ac79ae0e-7b1a-40bf-b620-f6c90f5770d6] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:53:58 - INFO - [ac79ae0e-7b1a-40bf-b620-f6c90f5770d6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:53:59 - INFO - [ac79ae0e-7b1a-40bf-b620-f6c90f5770d6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:53:59 - INFO - [ac79ae0e-7b1a-40bf-b620-f6c90f5770d6] 30 frames saved to temp_videos/ac79ae0e-7b1a-40bf-b620-f6c90f5770d6 +2025-08-22 05:54:12 - INFO - vision_config is None, using default vision config +2025-08-22 05:54:21 - INFO - Tokens per second: 3.57929410621367, Peak GPU memory MB: 9236.375 +2025-08-22 05:54:21 - INFO - [ac79ae0e-7b1a-40bf-b620-f6c90f5770d6] Inference time: 23.26 seconds, CPU usage: 29.8%, CPU core utilization: [27.5, 64.4, 7.3, 19.8] +2025-08-22 05:54:21 - INFO - [ac79ae0e-7b1a-40bf-b620-f6c90f5770d6] Cleaned up temporary frame directory: temp_videos/ac79ae0e-7b1a-40bf-b620-f6c90f5770d6 +2025-08-22 05:54:21 - INFO - [304d70b6-ebf9-4d42-b6c7-d5933fe8b991] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:54:21 - INFO - [304d70b6-ebf9-4d42-b6c7-d5933fe8b991] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:54:26 - INFO - [304d70b6-ebf9-4d42-b6c7-d5933fe8b991] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:54:26 - INFO - [304d70b6-ebf9-4d42-b6c7-d5933fe8b991] 30 frames saved to temp_videos/304d70b6-ebf9-4d42-b6c7-d5933fe8b991 +2025-08-22 05:54:39 - INFO - vision_config is None, using default vision config +2025-08-22 05:54:54 - INFO - Tokens per second: 7.079099419479299, Peak GPU memory MB: 9236.375 +2025-08-22 05:54:54 - INFO - [304d70b6-ebf9-4d42-b6c7-d5933fe8b991] Inference time: 32.54 seconds, CPU usage: 37.9%, CPU core utilization: [18.0, 24.0, 55.7, 54.0] +2025-08-22 05:54:54 - INFO - [304d70b6-ebf9-4d42-b6c7-d5933fe8b991] Cleaned up temporary frame directory: temp_videos/304d70b6-ebf9-4d42-b6c7-d5933fe8b991 +2025-08-22 05:54:54 - INFO - [ef3c69d1-c1a7-425a-a488-809429a78f15] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:54:54 - INFO - [ef3c69d1-c1a7-425a-a488-809429a78f15] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:54:54 - INFO - [ef3c69d1-c1a7-425a-a488-809429a78f15] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:54:55 - INFO - [ef3c69d1-c1a7-425a-a488-809429a78f15] 30 frames saved to temp_videos/ef3c69d1-c1a7-425a-a488-809429a78f15 +2025-08-22 05:55:07 - INFO - vision_config is None, using default vision config +2025-08-22 05:55:18 - INFO - Tokens per second: 4.668429203015315, Peak GPU memory MB: 9236.375 +2025-08-22 05:55:18 - INFO - [ef3c69d1-c1a7-425a-a488-809429a78f15] Inference time: 24.50 seconds, CPU usage: 29.9%, CPU core utilization: [36.4, 27.0, 24.5, 31.9] +2025-08-22 05:55:18 - INFO - [ef3c69d1-c1a7-425a-a488-809429a78f15] Cleaned up temporary frame directory: temp_videos/ef3c69d1-c1a7-425a-a488-809429a78f15 +2025-08-22 05:55:18 - INFO - [d836cd40-b5dc-4621-8bea-ae0762c669fa] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:55:18 - INFO - [d836cd40-b5dc-4621-8bea-ae0762c669fa] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:55:19 - INFO - [d836cd40-b5dc-4621-8bea-ae0762c669fa] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:55:19 - INFO - [d836cd40-b5dc-4621-8bea-ae0762c669fa] 30 frames saved to temp_videos/d836cd40-b5dc-4621-8bea-ae0762c669fa +2025-08-22 05:55:32 - INFO - vision_config is None, using default vision config +2025-08-22 05:55:43 - INFO - Tokens per second: 5.1673857196625175, Peak GPU memory MB: 9236.375 +2025-08-22 05:55:43 - INFO - [d836cd40-b5dc-4621-8bea-ae0762c669fa] Inference time: 25.01 seconds, CPU usage: 30.0%, CPU core utilization: [53.4, 44.7, 12.5, 9.1] +2025-08-22 05:55:43 - INFO - [d836cd40-b5dc-4621-8bea-ae0762c669fa] Cleaned up temporary frame directory: temp_videos/d836cd40-b5dc-4621-8bea-ae0762c669fa diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110321.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110321.log new file mode 100644 index 0000000000000000000000000000000000000000..9894cfdd9a8e42c50ecc1f1f3fc078cf98a39603 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110321.log @@ -0,0 +1,44 @@ +2025-08-22 11:03:21 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 11:03:22 - INFO - vision_config is None, using default vision config +2025-08-22 11:03:48 - INFO - Model loaded in 26.75 seconds +2025-08-22 11:03:48 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 11:03:48 - INFO - [850d2e5c-0cec-493c-8ce6-c55f9b80e685] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:03:48 - INFO - [850d2e5c-0cec-493c-8ce6-c55f9b80e685] Extracting frames using method: uniform, rate/threshold: 5 +2025-08-22 11:03:49 - INFO - [850d2e5c-0cec-493c-8ce6-c55f9b80e685] Extracted 5 frames successfully. Saving to temporary files... +2025-08-22 11:03:49 - INFO - [850d2e5c-0cec-493c-8ce6-c55f9b80e685] 5 frames saved to temp_videos/850d2e5c-0cec-493c-8ce6-c55f9b80e685 +2025-08-22 11:03:56 - INFO - vision_config is None, using default vision config +2025-08-22 11:04:05 - INFO - Tokens per second: 11.493479553556993, Peak GPU memory MB: 5852.375 +2025-08-22 11:04:05 - INFO - [850d2e5c-0cec-493c-8ce6-c55f9b80e685] Inference time: 17.25 seconds, CPU usage: 44.5%, CPU core utilization: [52.4, 44.4, 41.4, 39.7] +2025-08-22 11:04:05 - INFO - [850d2e5c-0cec-493c-8ce6-c55f9b80e685] Cleaned up temporary frame directory: temp_videos/850d2e5c-0cec-493c-8ce6-c55f9b80e685 +2025-08-22 11:04:05 - INFO - [60f24278-4a3e-40a5-b6ca-91d948dccefd] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:04:05 - INFO - [60f24278-4a3e-40a5-b6ca-91d948dccefd] Extracting frames using method: uniform, rate/threshold: 5 +2025-08-22 11:04:05 - INFO - [60f24278-4a3e-40a5-b6ca-91d948dccefd] Extracted 5 frames successfully. Saving to temporary files... +2025-08-22 11:04:05 - INFO - [60f24278-4a3e-40a5-b6ca-91d948dccefd] 5 frames saved to temp_videos/60f24278-4a3e-40a5-b6ca-91d948dccefd +2025-08-22 11:04:07 - INFO - vision_config is None, using default vision config +2025-08-22 11:04:17 - INFO - Tokens per second: 12.456803223684428, Peak GPU memory MB: 5852.375 +2025-08-22 11:04:17 - INFO - [60f24278-4a3e-40a5-b6ca-91d948dccefd] Inference time: 11.52 seconds, CPU usage: 28.2%, CPU core utilization: [7.4, 39.4, 19.4, 46.8] +2025-08-22 11:04:17 - INFO - [60f24278-4a3e-40a5-b6ca-91d948dccefd] Cleaned up temporary frame directory: temp_videos/60f24278-4a3e-40a5-b6ca-91d948dccefd +2025-08-22 11:04:17 - INFO - [69a97693-6053-4c05-946f-6feb3c853a6f] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:04:17 - INFO - [69a97693-6053-4c05-946f-6feb3c853a6f] Extracting frames using method: uniform, rate/threshold: 5 +2025-08-22 11:04:17 - INFO - [69a97693-6053-4c05-946f-6feb3c853a6f] Extracted 5 frames successfully. Saving to temporary files... +2025-08-22 11:04:17 - INFO - [69a97693-6053-4c05-946f-6feb3c853a6f] 5 frames saved to temp_videos/69a97693-6053-4c05-946f-6feb3c853a6f +2025-08-22 11:04:19 - INFO - vision_config is None, using default vision config +2025-08-22 11:04:27 - INFO - Tokens per second: 12.085148643257336, Peak GPU memory MB: 5852.375 +2025-08-22 11:04:27 - INFO - [69a97693-6053-4c05-946f-6feb3c853a6f] Inference time: 10.63 seconds, CPU usage: 31.5%, CPU core utilization: [35.8, 14.0, 22.2, 54.1] +2025-08-22 11:04:27 - INFO - [69a97693-6053-4c05-946f-6feb3c853a6f] Cleaned up temporary frame directory: temp_videos/69a97693-6053-4c05-946f-6feb3c853a6f +2025-08-22 11:04:27 - INFO - [79c77958-73b6-448a-b016-c5817b5973f1] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:04:27 - INFO - [79c77958-73b6-448a-b016-c5817b5973f1] Extracting frames using method: uniform, rate/threshold: 5 +2025-08-22 11:04:27 - INFO - [79c77958-73b6-448a-b016-c5817b5973f1] Extracted 5 frames successfully. Saving to temporary files... +2025-08-22 11:04:27 - INFO - [79c77958-73b6-448a-b016-c5817b5973f1] 5 frames saved to temp_videos/79c77958-73b6-448a-b016-c5817b5973f1 +2025-08-22 11:04:30 - INFO - vision_config is None, using default vision config +2025-08-22 11:04:36 - INFO - Tokens per second: 11.539003114652623, Peak GPU memory MB: 5852.375 +2025-08-22 11:04:36 - INFO - [79c77958-73b6-448a-b016-c5817b5973f1] Inference time: 8.36 seconds, CPU usage: 28.5%, CPU core utilization: [7.8, 59.5, 26.3, 20.3] +2025-08-22 11:04:36 - INFO - [79c77958-73b6-448a-b016-c5817b5973f1] Cleaned up temporary frame directory: temp_videos/79c77958-73b6-448a-b016-c5817b5973f1 +2025-08-22 11:04:36 - INFO - [702f19f1-1586-4d4d-8814-d3d4a9d67561] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:04:36 - INFO - [702f19f1-1586-4d4d-8814-d3d4a9d67561] Extracting frames using method: uniform, rate/threshold: 5 +2025-08-22 11:04:36 - INFO - [702f19f1-1586-4d4d-8814-d3d4a9d67561] Extracted 5 frames successfully. Saving to temporary files... +2025-08-22 11:04:36 - INFO - [702f19f1-1586-4d4d-8814-d3d4a9d67561] 5 frames saved to temp_videos/702f19f1-1586-4d4d-8814-d3d4a9d67561 +2025-08-22 11:04:38 - INFO - vision_config is None, using default vision config +2025-08-22 11:04:47 - INFO - Tokens per second: 12.248593250808584, Peak GPU memory MB: 5852.375 +2025-08-22 11:04:47 - INFO - [702f19f1-1586-4d4d-8814-d3d4a9d67561] Inference time: 11.68 seconds, CPU usage: 29.9%, CPU core utilization: [23.1, 18.2, 14.5, 63.9] +2025-08-22 11:04:47 - INFO - [702f19f1-1586-4d4d-8814-d3d4a9d67561] Cleaned up temporary frame directory: temp_videos/702f19f1-1586-4d4d-8814-d3d4a9d67561 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110454.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110454.log new file mode 100644 index 0000000000000000000000000000000000000000..bfa9b6977c808126864bed953106c9762e845a7f --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110454.log @@ -0,0 +1,44 @@ +2025-08-22 11:04:54 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 11:04:54 - INFO - vision_config is None, using default vision config +2025-08-22 11:05:01 - INFO - Model loaded in 7.54 seconds +2025-08-22 11:05:01 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 11:05:01 - INFO - [2a91f41b-eac8-4c88-9b5e-9707198412eb] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:05:01 - INFO - [2a91f41b-eac8-4c88-9b5e-9707198412eb] Extracting frames using method: uniform, rate/threshold: 10 +2025-08-22 11:05:03 - INFO - [2a91f41b-eac8-4c88-9b5e-9707198412eb] Extracted 10 frames successfully. Saving to temporary files... +2025-08-22 11:05:03 - INFO - [2a91f41b-eac8-4c88-9b5e-9707198412eb] 10 frames saved to temp_videos/2a91f41b-eac8-4c88-9b5e-9707198412eb +2025-08-22 11:05:12 - INFO - vision_config is None, using default vision config +2025-08-22 11:05:23 - INFO - Tokens per second: 9.96703162358834, Peak GPU memory MB: 7752.375 +2025-08-22 11:05:23 - INFO - [2a91f41b-eac8-4c88-9b5e-9707198412eb] Inference time: 21.65 seconds, CPU usage: 68.2%, CPU core utilization: [66.8, 69.2, 64.3, 72.7] +2025-08-22 11:05:23 - INFO - [2a91f41b-eac8-4c88-9b5e-9707198412eb] Cleaned up temporary frame directory: temp_videos/2a91f41b-eac8-4c88-9b5e-9707198412eb +2025-08-22 11:05:23 - INFO - [41eac123-16c5-4d05-acfd-9b7ecd52e1e1] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:05:23 - INFO - [41eac123-16c5-4d05-acfd-9b7ecd52e1e1] Extracting frames using method: uniform, rate/threshold: 10 +2025-08-22 11:05:23 - INFO - [41eac123-16c5-4d05-acfd-9b7ecd52e1e1] Extracted 10 frames successfully. Saving to temporary files... +2025-08-22 11:05:23 - INFO - [41eac123-16c5-4d05-acfd-9b7ecd52e1e1] 10 frames saved to temp_videos/41eac123-16c5-4d05-acfd-9b7ecd52e1e1 +2025-08-22 11:05:28 - INFO - vision_config is None, using default vision config +2025-08-22 11:05:34 - INFO - Tokens per second: 9.183890722932835, Peak GPU memory MB: 7752.375 +2025-08-22 11:05:34 - INFO - [41eac123-16c5-4d05-acfd-9b7ecd52e1e1] Inference time: 10.76 seconds, CPU usage: 29.2%, CPU core utilization: [13.0, 31.1, 6.2, 66.8] +2025-08-22 11:05:34 - INFO - [41eac123-16c5-4d05-acfd-9b7ecd52e1e1] Cleaned up temporary frame directory: temp_videos/41eac123-16c5-4d05-acfd-9b7ecd52e1e1 +2025-08-22 11:05:34 - INFO - [671f2493-4cbc-4854-96ac-3711a296d91d] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:05:34 - INFO - [671f2493-4cbc-4854-96ac-3711a296d91d] Extracting frames using method: uniform, rate/threshold: 10 +2025-08-22 11:05:35 - INFO - [671f2493-4cbc-4854-96ac-3711a296d91d] Extracted 10 frames successfully. Saving to temporary files... +2025-08-22 11:05:35 - INFO - [671f2493-4cbc-4854-96ac-3711a296d91d] 10 frames saved to temp_videos/671f2493-4cbc-4854-96ac-3711a296d91d +2025-08-22 11:05:40 - INFO - vision_config is None, using default vision config +2025-08-22 11:06:04 - INFO - Tokens per second: 12.75781937284626, Peak GPU memory MB: 7762.375 +2025-08-22 11:06:04 - INFO - [671f2493-4cbc-4854-96ac-3711a296d91d] Inference time: 30.21 seconds, CPU usage: 30.8%, CPU core utilization: [13.1, 40.8, 59.1, 9.9] +2025-08-22 11:06:04 - INFO - [671f2493-4cbc-4854-96ac-3711a296d91d] Cleaned up temporary frame directory: temp_videos/671f2493-4cbc-4854-96ac-3711a296d91d +2025-08-22 11:06:04 - INFO - [a2c33b38-df7f-4f96-ac5f-85d8c75507ee] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:06:04 - INFO - [a2c33b38-df7f-4f96-ac5f-85d8c75507ee] Extracting frames using method: uniform, rate/threshold: 10 +2025-08-22 11:06:04 - INFO - [a2c33b38-df7f-4f96-ac5f-85d8c75507ee] Extracted 10 frames successfully. Saving to temporary files... +2025-08-22 11:06:04 - INFO - [a2c33b38-df7f-4f96-ac5f-85d8c75507ee] 10 frames saved to temp_videos/a2c33b38-df7f-4f96-ac5f-85d8c75507ee +2025-08-22 11:06:09 - INFO - vision_config is None, using default vision config +2025-08-22 11:06:18 - INFO - Tokens per second: 11.150383906582048, Peak GPU memory MB: 7762.375 +2025-08-22 11:06:18 - INFO - [a2c33b38-df7f-4f96-ac5f-85d8c75507ee] Inference time: 14.53 seconds, CPU usage: 28.4%, CPU core utilization: [43.1, 5.1, 32.5, 32.6] +2025-08-22 11:06:19 - INFO - [a2c33b38-df7f-4f96-ac5f-85d8c75507ee] Cleaned up temporary frame directory: temp_videos/a2c33b38-df7f-4f96-ac5f-85d8c75507ee +2025-08-22 11:06:19 - INFO - [a590da60-1f05-494d-b772-a95b92185462] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:06:19 - INFO - [a590da60-1f05-494d-b772-a95b92185462] Extracting frames using method: uniform, rate/threshold: 10 +2025-08-22 11:06:19 - INFO - [a590da60-1f05-494d-b772-a95b92185462] Extracted 10 frames successfully. Saving to temporary files... +2025-08-22 11:06:19 - INFO - [a590da60-1f05-494d-b772-a95b92185462] 10 frames saved to temp_videos/a590da60-1f05-494d-b772-a95b92185462 +2025-08-22 11:06:23 - INFO - vision_config is None, using default vision config +2025-08-22 11:06:29 - INFO - Tokens per second: 8.76210837152812, Peak GPU memory MB: 7762.375 +2025-08-22 11:06:29 - INFO - [a590da60-1f05-494d-b772-a95b92185462] Inference time: 10.51 seconds, CPU usage: 29.3%, CPU core utilization: [7.8, 96.6, 5.7, 6.4] +2025-08-22 11:06:29 - INFO - [a590da60-1f05-494d-b772-a95b92185462] Cleaned up temporary frame directory: temp_videos/a590da60-1f05-494d-b772-a95b92185462 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110635.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110635.log new file mode 100644 index 0000000000000000000000000000000000000000..649c9be89d55ff9b688862cf69291e0bf77cbfe0 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110635.log @@ -0,0 +1,44 @@ +2025-08-22 11:06:35 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 11:06:36 - INFO - vision_config is None, using default vision config +2025-08-22 11:06:41 - INFO - Model loaded in 6.33 seconds +2025-08-22 11:06:41 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 11:06:41 - INFO - [9c79b8e0-c117-48ce-87d7-0f69fe4b813f] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:06:41 - INFO - [9c79b8e0-c117-48ce-87d7-0f69fe4b813f] Extracting frames using method: uniform, rate/threshold: 20 +2025-08-22 11:06:44 - INFO - [9c79b8e0-c117-48ce-87d7-0f69fe4b813f] Extracted 20 frames successfully. Saving to temporary files... +2025-08-22 11:06:44 - INFO - [9c79b8e0-c117-48ce-87d7-0f69fe4b813f] 20 frames saved to temp_videos/9c79b8e0-c117-48ce-87d7-0f69fe4b813f +2025-08-22 11:06:58 - INFO - vision_config is None, using default vision config +2025-08-22 11:07:09 - INFO - Tokens per second: 8.219991139097228, Peak GPU memory MB: 9422.375 +2025-08-22 11:07:09 - INFO - [9c79b8e0-c117-48ce-87d7-0f69fe4b813f] Inference time: 27.53 seconds, CPU usage: 54.8%, CPU core utilization: [47.4, 63.2, 44.9, 63.6] +2025-08-22 11:07:09 - INFO - [9c79b8e0-c117-48ce-87d7-0f69fe4b813f] Cleaned up temporary frame directory: temp_videos/9c79b8e0-c117-48ce-87d7-0f69fe4b813f +2025-08-22 11:07:09 - INFO - [930de0f2-3168-402b-bf54-ae4b35e66d88] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:07:09 - INFO - [930de0f2-3168-402b-bf54-ae4b35e66d88] Extracting frames using method: uniform, rate/threshold: 20 +2025-08-22 11:07:10 - INFO - [930de0f2-3168-402b-bf54-ae4b35e66d88] Extracted 20 frames successfully. Saving to temporary files... +2025-08-22 11:07:10 - INFO - [930de0f2-3168-402b-bf54-ae4b35e66d88] 20 frames saved to temp_videos/930de0f2-3168-402b-bf54-ae4b35e66d88 +2025-08-22 11:07:18 - INFO - vision_config is None, using default vision config +2025-08-22 11:07:26 - INFO - Tokens per second: 6.727455931746866, Peak GPU memory MB: 9422.375 +2025-08-22 11:07:26 - INFO - [930de0f2-3168-402b-bf54-ae4b35e66d88] Inference time: 17.58 seconds, CPU usage: 29.4%, CPU core utilization: [36.6, 7.3, 68.2, 5.1] +2025-08-22 11:07:26 - INFO - [930de0f2-3168-402b-bf54-ae4b35e66d88] Cleaned up temporary frame directory: temp_videos/930de0f2-3168-402b-bf54-ae4b35e66d88 +2025-08-22 11:07:26 - INFO - [94b78871-ffda-4079-8563-cba6458604e0] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:07:26 - INFO - [94b78871-ffda-4079-8563-cba6458604e0] Extracting frames using method: uniform, rate/threshold: 20 +2025-08-22 11:07:29 - INFO - [94b78871-ffda-4079-8563-cba6458604e0] Extracted 20 frames successfully. Saving to temporary files... +2025-08-22 11:07:29 - INFO - [94b78871-ffda-4079-8563-cba6458604e0] 20 frames saved to temp_videos/94b78871-ffda-4079-8563-cba6458604e0 +2025-08-22 11:07:38 - INFO - vision_config is None, using default vision config +2025-08-22 11:07:46 - INFO - Tokens per second: 6.369975503038201, Peak GPU memory MB: 9422.375 +2025-08-22 11:07:46 - INFO - [94b78871-ffda-4079-8563-cba6458604e0] Inference time: 19.86 seconds, CPU usage: 38.2%, CPU core utilization: [37.0, 40.9, 21.5, 53.7] +2025-08-22 11:07:46 - INFO - [94b78871-ffda-4079-8563-cba6458604e0] Cleaned up temporary frame directory: temp_videos/94b78871-ffda-4079-8563-cba6458604e0 +2025-08-22 11:07:46 - INFO - [941b1262-a196-4fea-80d5-3190ee638fe5] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:07:46 - INFO - [941b1262-a196-4fea-80d5-3190ee638fe5] Extracting frames using method: uniform, rate/threshold: 20 +2025-08-22 11:07:47 - INFO - [941b1262-a196-4fea-80d5-3190ee638fe5] Extracted 20 frames successfully. Saving to temporary files... +2025-08-22 11:07:47 - INFO - [941b1262-a196-4fea-80d5-3190ee638fe5] 20 frames saved to temp_videos/941b1262-a196-4fea-80d5-3190ee638fe5 +2025-08-22 11:07:56 - INFO - vision_config is None, using default vision config +2025-08-22 11:08:04 - INFO - Tokens per second: 6.581258041607729, Peak GPU memory MB: 9422.375 +2025-08-22 11:08:04 - INFO - [941b1262-a196-4fea-80d5-3190ee638fe5] Inference time: 17.51 seconds, CPU usage: 29.6%, CPU core utilization: [27.6, 18.4, 31.1, 41.1] +2025-08-22 11:08:04 - INFO - [941b1262-a196-4fea-80d5-3190ee638fe5] Cleaned up temporary frame directory: temp_videos/941b1262-a196-4fea-80d5-3190ee638fe5 +2025-08-22 11:08:04 - INFO - [3f721ab9-2e0c-4b0e-a760-9ded5da8519b] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:08:04 - INFO - [3f721ab9-2e0c-4b0e-a760-9ded5da8519b] Extracting frames using method: uniform, rate/threshold: 20 +2025-08-22 11:08:04 - INFO - [3f721ab9-2e0c-4b0e-a760-9ded5da8519b] Extracted 20 frames successfully. Saving to temporary files... +2025-08-22 11:08:05 - INFO - [3f721ab9-2e0c-4b0e-a760-9ded5da8519b] 20 frames saved to temp_videos/3f721ab9-2e0c-4b0e-a760-9ded5da8519b +2025-08-22 11:08:13 - INFO - vision_config is None, using default vision config +2025-08-22 11:08:21 - INFO - Tokens per second: 6.564398995813938, Peak GPU memory MB: 9422.375 +2025-08-22 11:08:21 - INFO - [3f721ab9-2e0c-4b0e-a760-9ded5da8519b] Inference time: 17.65 seconds, CPU usage: 29.3%, CPU core utilization: [6.9, 23.7, 5.5, 80.6] +2025-08-22 11:08:21 - INFO - [3f721ab9-2e0c-4b0e-a760-9ded5da8519b] Cleaned up temporary frame directory: temp_videos/3f721ab9-2e0c-4b0e-a760-9ded5da8519b diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110827.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110827.log new file mode 100644 index 0000000000000000000000000000000000000000..0d5ae6acbdb839be1b77846942cba3db6a72f471 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_110827.log @@ -0,0 +1,44 @@ +2025-08-22 11:08:27 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 11:08:27 - INFO - vision_config is None, using default vision config +2025-08-22 11:08:34 - INFO - Model loaded in 7.23 seconds +2025-08-22 11:08:34 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 11:08:34 - INFO - [08a603cf-ef2a-43b7-8a57-51d776507de1] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:08:34 - INFO - [08a603cf-ef2a-43b7-8a57-51d776507de1] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 11:08:39 - INFO - [08a603cf-ef2a-43b7-8a57-51d776507de1] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 11:08:39 - INFO - [08a603cf-ef2a-43b7-8a57-51d776507de1] 30 frames saved to temp_videos/08a603cf-ef2a-43b7-8a57-51d776507de1 +2025-08-22 11:08:56 - INFO - vision_config is None, using default vision config +2025-08-22 11:09:09 - INFO - Tokens per second: 6.54134801287868, Peak GPU memory MB: 9354.375 +2025-08-22 11:09:09 - INFO - [08a603cf-ef2a-43b7-8a57-51d776507de1] Inference time: 35.41 seconds, CPU usage: 60.7%, CPU core utilization: [50.1, 53.8, 54.9, 84.1] +2025-08-22 11:09:09 - INFO - [08a603cf-ef2a-43b7-8a57-51d776507de1] Cleaned up temporary frame directory: temp_videos/08a603cf-ef2a-43b7-8a57-51d776507de1 +2025-08-22 11:09:09 - INFO - [10420b35-ed99-4649-95f4-1c15ec853a25] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:09:09 - INFO - [10420b35-ed99-4649-95f4-1c15ec853a25] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 11:09:10 - INFO - [10420b35-ed99-4649-95f4-1c15ec853a25] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 11:09:10 - INFO - [10420b35-ed99-4649-95f4-1c15ec853a25] 30 frames saved to temp_videos/10420b35-ed99-4649-95f4-1c15ec853a25 +2025-08-22 11:09:23 - INFO - vision_config is None, using default vision config +2025-08-22 11:09:36 - INFO - Tokens per second: 6.748738122493609, Peak GPU memory MB: 9354.375 +2025-08-22 11:09:36 - INFO - [10420b35-ed99-4649-95f4-1c15ec853a25] Inference time: 27.12 seconds, CPU usage: 29.2%, CPU core utilization: [21.0, 23.0, 51.7, 20.9] +2025-08-22 11:09:36 - INFO - [10420b35-ed99-4649-95f4-1c15ec853a25] Cleaned up temporary frame directory: temp_videos/10420b35-ed99-4649-95f4-1c15ec853a25 +2025-08-22 11:09:36 - INFO - [f7bceeb1-1e06-49db-becd-f87ec9ca6f0c] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:09:36 - INFO - [f7bceeb1-1e06-49db-becd-f87ec9ca6f0c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 11:09:41 - INFO - [f7bceeb1-1e06-49db-becd-f87ec9ca6f0c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 11:09:41 - INFO - [f7bceeb1-1e06-49db-becd-f87ec9ca6f0c] 30 frames saved to temp_videos/f7bceeb1-1e06-49db-becd-f87ec9ca6f0c +2025-08-22 11:09:54 - INFO - vision_config is None, using default vision config +2025-08-22 11:10:11 - INFO - Tokens per second: 8.176941134255504, Peak GPU memory MB: 9354.375 +2025-08-22 11:10:11 - INFO - [f7bceeb1-1e06-49db-becd-f87ec9ca6f0c] Inference time: 34.65 seconds, CPU usage: 36.7%, CPU core utilization: [44.8, 58.3, 26.0, 17.8] +2025-08-22 11:10:11 - INFO - [f7bceeb1-1e06-49db-becd-f87ec9ca6f0c] Cleaned up temporary frame directory: temp_videos/f7bceeb1-1e06-49db-becd-f87ec9ca6f0c +2025-08-22 11:10:11 - INFO - [bc80fbca-3475-45fb-87f2-3004d7fa9ea6] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:10:11 - INFO - [bc80fbca-3475-45fb-87f2-3004d7fa9ea6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 11:10:12 - INFO - [bc80fbca-3475-45fb-87f2-3004d7fa9ea6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 11:10:12 - INFO - [bc80fbca-3475-45fb-87f2-3004d7fa9ea6] 30 frames saved to temp_videos/bc80fbca-3475-45fb-87f2-3004d7fa9ea6 +2025-08-22 11:10:25 - INFO - vision_config is None, using default vision config +2025-08-22 11:10:34 - INFO - Tokens per second: 3.0094393528369956, Peak GPU memory MB: 9354.375 +2025-08-22 11:10:34 - INFO - [bc80fbca-3475-45fb-87f2-3004d7fa9ea6] Inference time: 22.81 seconds, CPU usage: 29.6%, CPU core utilization: [38.7, 14.6, 58.5, 6.4] +2025-08-22 11:10:34 - INFO - [bc80fbca-3475-45fb-87f2-3004d7fa9ea6] Cleaned up temporary frame directory: temp_videos/bc80fbca-3475-45fb-87f2-3004d7fa9ea6 +2025-08-22 11:10:34 - INFO - [77c278a9-142b-4240-91e0-72eeba53fdd8] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:10:34 - INFO - [77c278a9-142b-4240-91e0-72eeba53fdd8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 11:10:35 - INFO - [77c278a9-142b-4240-91e0-72eeba53fdd8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 11:10:35 - INFO - [77c278a9-142b-4240-91e0-72eeba53fdd8] 30 frames saved to temp_videos/77c278a9-142b-4240-91e0-72eeba53fdd8 +2025-08-22 11:10:48 - INFO - vision_config is None, using default vision config +2025-08-22 11:10:56 - INFO - Tokens per second: 1.520139023080047, Peak GPU memory MB: 9354.375 +2025-08-22 11:10:56 - INFO - [77c278a9-142b-4240-91e0-72eeba53fdd8] Inference time: 21.72 seconds, CPU usage: 30.3%, CPU core utilization: [20.7, 12.5, 32.2, 55.6] +2025-08-22 11:10:56 - INFO - [77c278a9-142b-4240-91e0-72eeba53fdd8] Cleaned up temporary frame directory: temp_videos/77c278a9-142b-4240-91e0-72eeba53fdd8 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111102.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111102.log new file mode 100644 index 0000000000000000000000000000000000000000..7da15e0d1d8f767301251b8f0900cb8c96694fba --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111102.log @@ -0,0 +1,44 @@ +2025-08-22 11:11:02 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 11:11:03 - INFO - vision_config is None, using default vision config +2025-08-22 11:11:10 - INFO - Model loaded in 7.36 seconds +2025-08-22 11:11:10 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 11:11:10 - INFO - [897f9022-0d69-4ab8-b312-e70bff671e01] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:11:10 - INFO - [897f9022-0d69-4ab8-b312-e70bff671e01] Extracting frames using method: uniform, rate/threshold: 40 +2025-08-22 11:11:16 - INFO - [897f9022-0d69-4ab8-b312-e70bff671e01] Extracted 40 frames successfully. Saving to temporary files... +2025-08-22 11:11:16 - INFO - [897f9022-0d69-4ab8-b312-e70bff671e01] 40 frames saved to temp_videos/897f9022-0d69-4ab8-b312-e70bff671e01 +2025-08-22 11:11:37 - INFO - vision_config is None, using default vision config +2025-08-22 11:11:50 - INFO - Tokens per second: 2.5171303106196135, Peak GPU memory MB: 11526.375 +2025-08-22 11:11:50 - INFO - [897f9022-0d69-4ab8-b312-e70bff671e01] Inference time: 40.75 seconds, CPU usage: 77.6%, CPU core utilization: [71.4, 79.1, 84.1, 75.8] +2025-08-22 11:11:50 - INFO - [897f9022-0d69-4ab8-b312-e70bff671e01] Cleaned up temporary frame directory: temp_videos/897f9022-0d69-4ab8-b312-e70bff671e01 +2025-08-22 11:11:50 - INFO - [d2ac705f-4d82-47f2-a0d0-c0fdb814baca] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:11:50 - INFO - [d2ac705f-4d82-47f2-a0d0-c0fdb814baca] Extracting frames using method: uniform, rate/threshold: 40 +2025-08-22 11:11:52 - INFO - [d2ac705f-4d82-47f2-a0d0-c0fdb814baca] Extracted 40 frames successfully. Saving to temporary files... +2025-08-22 11:11:52 - INFO - [d2ac705f-4d82-47f2-a0d0-c0fdb814baca] 40 frames saved to temp_videos/d2ac705f-4d82-47f2-a0d0-c0fdb814baca +2025-08-22 11:12:10 - INFO - vision_config is None, using default vision config +2025-08-22 11:12:23 - INFO - Tokens per second: 3.2227324826478276, Peak GPU memory MB: 11526.375 +2025-08-22 11:12:23 - INFO - [d2ac705f-4d82-47f2-a0d0-c0fdb814baca] Inference time: 32.98 seconds, CPU usage: 37.1%, CPU core utilization: [27.5, 27.7, 40.9, 52.2] +2025-08-22 11:12:23 - INFO - [d2ac705f-4d82-47f2-a0d0-c0fdb814baca] Cleaned up temporary frame directory: temp_videos/d2ac705f-4d82-47f2-a0d0-c0fdb814baca +2025-08-22 11:12:23 - INFO - [af12b757-3a79-42ce-930b-8a500462135b] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:12:23 - INFO - [af12b757-3a79-42ce-930b-8a500462135b] Extracting frames using method: uniform, rate/threshold: 40 +2025-08-22 11:12:30 - INFO - [af12b757-3a79-42ce-930b-8a500462135b] Extracted 40 frames successfully. Saving to temporary files... +2025-08-22 11:12:30 - INFO - [af12b757-3a79-42ce-930b-8a500462135b] 40 frames saved to temp_videos/af12b757-3a79-42ce-930b-8a500462135b +2025-08-22 11:12:47 - INFO - vision_config is None, using default vision config +2025-08-22 11:13:02 - INFO - Tokens per second: 4.038851709599647, Peak GPU memory MB: 11526.375 +2025-08-22 11:13:02 - INFO - [af12b757-3a79-42ce-930b-8a500462135b] Inference time: 38.78 seconds, CPU usage: 37.8%, CPU core utilization: [22.1, 64.5, 18.3, 46.1] +2025-08-22 11:13:02 - INFO - [af12b757-3a79-42ce-930b-8a500462135b] Cleaned up temporary frame directory: temp_videos/af12b757-3a79-42ce-930b-8a500462135b +2025-08-22 11:13:02 - INFO - [9491609f-1203-42a7-ba7d-d1b6dddc9afc] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:13:02 - INFO - [9491609f-1203-42a7-ba7d-d1b6dddc9afc] Extracting frames using method: uniform, rate/threshold: 40 +2025-08-22 11:13:03 - INFO - [9491609f-1203-42a7-ba7d-d1b6dddc9afc] Extracted 40 frames successfully. Saving to temporary files... +2025-08-22 11:13:03 - INFO - [9491609f-1203-42a7-ba7d-d1b6dddc9afc] 40 frames saved to temp_videos/9491609f-1203-42a7-ba7d-d1b6dddc9afc +2025-08-22 11:13:21 - INFO - vision_config is None, using default vision config +2025-08-22 11:13:35 - INFO - Tokens per second: 2.9283473152290984, Peak GPU memory MB: 11526.375 +2025-08-22 11:13:35 - INFO - [9491609f-1203-42a7-ba7d-d1b6dddc9afc] Inference time: 32.50 seconds, CPU usage: 51.3%, CPU core utilization: [48.0, 38.8, 78.1, 40.1] +2025-08-22 11:13:35 - INFO - [9491609f-1203-42a7-ba7d-d1b6dddc9afc] Cleaned up temporary frame directory: temp_videos/9491609f-1203-42a7-ba7d-d1b6dddc9afc +2025-08-22 11:13:35 - INFO - [2774fd16-e3bd-4beb-bfd1-c13ed84ba843] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:13:35 - INFO - [2774fd16-e3bd-4beb-bfd1-c13ed84ba843] Extracting frames using method: uniform, rate/threshold: 40 +2025-08-22 11:13:36 - INFO - [2774fd16-e3bd-4beb-bfd1-c13ed84ba843] Extracted 40 frames successfully. Saving to temporary files... +2025-08-22 11:13:36 - INFO - [2774fd16-e3bd-4beb-bfd1-c13ed84ba843] 40 frames saved to temp_videos/2774fd16-e3bd-4beb-bfd1-c13ed84ba843 +2025-08-22 11:13:54 - INFO - vision_config is None, using default vision config +2025-08-22 11:14:10 - INFO - Tokens per second: 4.766323941506251, Peak GPU memory MB: 11526.375 +2025-08-22 11:14:10 - INFO - [2774fd16-e3bd-4beb-bfd1-c13ed84ba843] Inference time: 35.72 seconds, CPU usage: 43.7%, CPU core utilization: [39.6, 40.6, 64.6, 29.9] +2025-08-22 11:14:10 - INFO - [2774fd16-e3bd-4beb-bfd1-c13ed84ba843] Cleaned up temporary frame directory: temp_videos/2774fd16-e3bd-4beb-bfd1-c13ed84ba843 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111417.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111417.log new file mode 100644 index 0000000000000000000000000000000000000000..49c8fd357124a53a987ca99a8425cfc426e9f83e --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111417.log @@ -0,0 +1,44 @@ +2025-08-22 11:14:17 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 11:14:17 - INFO - vision_config is None, using default vision config +2025-08-22 11:14:24 - INFO - Model loaded in 6.82 seconds +2025-08-22 11:14:24 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 11:14:24 - INFO - [77df7ba2-e74c-403e-adf9-6849dcd7550d] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:14:24 - INFO - [77df7ba2-e74c-403e-adf9-6849dcd7550d] Extracting frames using method: uniform, rate/threshold: 50 +2025-08-22 11:14:32 - INFO - [77df7ba2-e74c-403e-adf9-6849dcd7550d] Extracted 50 frames successfully. Saving to temporary files... +2025-08-22 11:14:32 - INFO - [77df7ba2-e74c-403e-adf9-6849dcd7550d] 50 frames saved to temp_videos/77df7ba2-e74c-403e-adf9-6849dcd7550d +2025-08-22 11:14:58 - INFO - vision_config is None, using default vision config +2025-08-22 11:15:18 - INFO - Tokens per second: 3.636909184972061, Peak GPU memory MB: 12718.375 +2025-08-22 11:15:18 - INFO - [77df7ba2-e74c-403e-adf9-6849dcd7550d] Inference time: 54.29 seconds, CPU usage: 52.7%, CPU core utilization: [46.7, 54.8, 68.7, 40.6] +2025-08-22 11:15:18 - INFO - [77df7ba2-e74c-403e-adf9-6849dcd7550d] Cleaned up temporary frame directory: temp_videos/77df7ba2-e74c-403e-adf9-6849dcd7550d +2025-08-22 11:15:18 - INFO - [825ed41a-2804-41d9-882c-3d8381ec27ce] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:15:18 - INFO - [825ed41a-2804-41d9-882c-3d8381ec27ce] Extracting frames using method: uniform, rate/threshold: 50 +2025-08-22 11:15:19 - INFO - [825ed41a-2804-41d9-882c-3d8381ec27ce] Extracted 50 frames successfully. Saving to temporary files... +2025-08-22 11:15:20 - INFO - [825ed41a-2804-41d9-882c-3d8381ec27ce] 50 frames saved to temp_videos/825ed41a-2804-41d9-882c-3d8381ec27ce +2025-08-22 11:15:41 - INFO - vision_config is None, using default vision config +2025-08-22 11:15:59 - INFO - Tokens per second: 2.947881774503077, Peak GPU memory MB: 12718.375 +2025-08-22 11:15:59 - INFO - [825ed41a-2804-41d9-882c-3d8381ec27ce] Inference time: 41.44 seconds, CPU usage: 30.3%, CPU core utilization: [68.7, 22.6, 21.7, 8.2] +2025-08-22 11:15:59 - INFO - [825ed41a-2804-41d9-882c-3d8381ec27ce] Cleaned up temporary frame directory: temp_videos/825ed41a-2804-41d9-882c-3d8381ec27ce +2025-08-22 11:15:59 - INFO - [457f524d-183d-498a-bddb-3bf921791a74] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:15:59 - INFO - [457f524d-183d-498a-bddb-3bf921791a74] Extracting frames using method: uniform, rate/threshold: 50 +2025-08-22 11:16:08 - INFO - [457f524d-183d-498a-bddb-3bf921791a74] Extracted 50 frames successfully. Saving to temporary files... +2025-08-22 11:16:08 - INFO - [457f524d-183d-498a-bddb-3bf921791a74] 50 frames saved to temp_videos/457f524d-183d-498a-bddb-3bf921791a74 +2025-08-22 11:16:29 - INFO - vision_config is None, using default vision config +2025-08-22 11:16:56 - INFO - Tokens per second: 5.257369693058746, Peak GPU memory MB: 12718.375 +2025-08-22 11:16:56 - INFO - [457f524d-183d-498a-bddb-3bf921791a74] Inference time: 56.38 seconds, CPU usage: 36.4%, CPU core utilization: [60.8, 27.2, 40.1, 17.2] +2025-08-22 11:16:56 - INFO - [457f524d-183d-498a-bddb-3bf921791a74] Cleaned up temporary frame directory: temp_videos/457f524d-183d-498a-bddb-3bf921791a74 +2025-08-22 11:16:56 - INFO - [110b198a-4fb3-4168-a92c-4a1afe6ed016] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:16:56 - INFO - [110b198a-4fb3-4168-a92c-4a1afe6ed016] Extracting frames using method: uniform, rate/threshold: 50 +2025-08-22 11:16:57 - INFO - [110b198a-4fb3-4168-a92c-4a1afe6ed016] Extracted 50 frames successfully. Saving to temporary files... +2025-08-22 11:16:57 - INFO - [110b198a-4fb3-4168-a92c-4a1afe6ed016] 50 frames saved to temp_videos/110b198a-4fb3-4168-a92c-4a1afe6ed016 +2025-08-22 11:17:19 - INFO - vision_config is None, using default vision config +2025-08-22 11:17:48 - INFO - Tokens per second: 5.539234909838161, Peak GPU memory MB: 12718.375 +2025-08-22 11:17:48 - INFO - [110b198a-4fb3-4168-a92c-4a1afe6ed016] Inference time: 52.03 seconds, CPU usage: 29.2%, CPU core utilization: [41.4, 8.4, 58.5, 8.4] +2025-08-22 11:17:48 - INFO - [110b198a-4fb3-4168-a92c-4a1afe6ed016] Cleaned up temporary frame directory: temp_videos/110b198a-4fb3-4168-a92c-4a1afe6ed016 +2025-08-22 11:17:48 - INFO - [7b55f145-f8fa-4977-bedd-c078760b3c56] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:17:48 - INFO - [7b55f145-f8fa-4977-bedd-c078760b3c56] Extracting frames using method: uniform, rate/threshold: 50 +2025-08-22 11:17:49 - INFO - [7b55f145-f8fa-4977-bedd-c078760b3c56] Extracted 50 frames successfully. Saving to temporary files... +2025-08-22 11:17:49 - INFO - [7b55f145-f8fa-4977-bedd-c078760b3c56] 50 frames saved to temp_videos/7b55f145-f8fa-4977-bedd-c078760b3c56 +2025-08-22 11:18:11 - INFO - vision_config is None, using default vision config +2025-08-22 11:18:36 - INFO - Tokens per second: 4.7551999525578434, Peak GPU memory MB: 12718.375 +2025-08-22 11:18:36 - INFO - [7b55f145-f8fa-4977-bedd-c078760b3c56] Inference time: 47.79 seconds, CPU usage: 28.7%, CPU core utilization: [6.5, 9.0, 54.5, 44.7] +2025-08-22 11:18:36 - INFO - [7b55f145-f8fa-4977-bedd-c078760b3c56] Cleaned up temporary frame directory: temp_videos/7b55f145-f8fa-4977-bedd-c078760b3c56 diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111842.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111842.log new file mode 100644 index 0000000000000000000000000000000000000000..2e67cc9fe8cb15a9920d9ec9c5a6c9cd31332372 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_111842.log @@ -0,0 +1,44 @@ +2025-08-22 11:18:42 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 11:18:43 - INFO - vision_config is None, using default vision config +2025-08-22 11:18:49 - INFO - Model loaded in 7.35 seconds +2025-08-22 11:18:49 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 11:18:49 - INFO - [7775eecd-8936-4c41-86d5-7f025a398a9b] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:18:49 - INFO - [7775eecd-8936-4c41-86d5-7f025a398a9b] Extracting frames using method: uniform, rate/threshold: 60 +2025-08-22 11:18:59 - INFO - [7775eecd-8936-4c41-86d5-7f025a398a9b] Extracted 60 frames successfully. Saving to temporary files... +2025-08-22 11:19:00 - INFO - [7775eecd-8936-4c41-86d5-7f025a398a9b] 60 frames saved to temp_videos/7775eecd-8936-4c41-86d5-7f025a398a9b +2025-08-22 11:19:29 - INFO - vision_config is None, using default vision config +2025-08-22 11:19:54 - INFO - Tokens per second: 2.62416869979478, Peak GPU memory MB: 14174.375 +2025-08-22 11:19:54 - INFO - [7775eecd-8936-4c41-86d5-7f025a398a9b] Inference time: 64.17 seconds, CPU usage: 67.6%, CPU core utilization: [67.3, 63.0, 71.0, 68.9] +2025-08-22 11:19:54 - INFO - [7775eecd-8936-4c41-86d5-7f025a398a9b] Cleaned up temporary frame directory: temp_videos/7775eecd-8936-4c41-86d5-7f025a398a9b +2025-08-22 11:19:54 - INFO - [90697562-2d13-486b-a271-d402cbd73e1c] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:19:54 - INFO - [90697562-2d13-486b-a271-d402cbd73e1c] Extracting frames using method: uniform, rate/threshold: 60 +2025-08-22 11:19:55 - INFO - [90697562-2d13-486b-a271-d402cbd73e1c] Extracted 60 frames successfully. Saving to temporary files... +2025-08-22 11:19:55 - INFO - [90697562-2d13-486b-a271-d402cbd73e1c] 60 frames saved to temp_videos/90697562-2d13-486b-a271-d402cbd73e1c +2025-08-22 11:20:21 - INFO - vision_config is None, using default vision config +2025-08-22 11:20:48 - INFO - Tokens per second: 3.1184040645544244, Peak GPU memory MB: 14174.375 +2025-08-22 11:20:48 - INFO - [90697562-2d13-486b-a271-d402cbd73e1c] Inference time: 54.37 seconds, CPU usage: 29.1%, CPU core utilization: [28.7, 10.4, 71.4, 5.8] +2025-08-22 11:20:48 - INFO - [90697562-2d13-486b-a271-d402cbd73e1c] Cleaned up temporary frame directory: temp_videos/90697562-2d13-486b-a271-d402cbd73e1c +2025-08-22 11:20:48 - INFO - [80d4975d-d162-40b0-8ef9-c0a8ca1a243d] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:20:48 - INFO - [80d4975d-d162-40b0-8ef9-c0a8ca1a243d] Extracting frames using method: uniform, rate/threshold: 60 +2025-08-22 11:20:58 - INFO - [80d4975d-d162-40b0-8ef9-c0a8ca1a243d] Extracted 60 frames successfully. Saving to temporary files... +2025-08-22 11:20:58 - INFO - [80d4975d-d162-40b0-8ef9-c0a8ca1a243d] 60 frames saved to temp_videos/80d4975d-d162-40b0-8ef9-c0a8ca1a243d +2025-08-22 11:21:23 - INFO - vision_config is None, using default vision config +2025-08-22 11:21:53 - INFO - Tokens per second: 3.6808149496535814, Peak GPU memory MB: 14174.375 +2025-08-22 11:21:53 - INFO - [80d4975d-d162-40b0-8ef9-c0a8ca1a243d] Inference time: 65.01 seconds, CPU usage: 36.5%, CPU core utilization: [32.9, 55.4, 27.5, 30.3] +2025-08-22 11:21:53 - INFO - [80d4975d-d162-40b0-8ef9-c0a8ca1a243d] Cleaned up temporary frame directory: temp_videos/80d4975d-d162-40b0-8ef9-c0a8ca1a243d +2025-08-22 11:21:53 - INFO - [d67151ce-541f-48c0-9c81-232f5b638a15] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:21:53 - INFO - [d67151ce-541f-48c0-9c81-232f5b638a15] Extracting frames using method: uniform, rate/threshold: 60 +2025-08-22 11:21:55 - INFO - [d67151ce-541f-48c0-9c81-232f5b638a15] Extracted 60 frames successfully. Saving to temporary files... +2025-08-22 11:21:55 - INFO - [d67151ce-541f-48c0-9c81-232f5b638a15] 60 frames saved to temp_videos/d67151ce-541f-48c0-9c81-232f5b638a15 +2025-08-22 11:22:21 - INFO - vision_config is None, using default vision config +2025-08-22 11:22:46 - INFO - Tokens per second: 2.722827822424792, Peak GPU memory MB: 14174.375 +2025-08-22 11:22:46 - INFO - [d67151ce-541f-48c0-9c81-232f5b638a15] Inference time: 52.91 seconds, CPU usage: 29.6%, CPU core utilization: [23.8, 25.4, 61.5, 7.8] +2025-08-22 11:22:46 - INFO - [d67151ce-541f-48c0-9c81-232f5b638a15] Cleaned up temporary frame directory: temp_videos/d67151ce-541f-48c0-9c81-232f5b638a15 +2025-08-22 11:22:46 - INFO - [81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 11:22:46 - INFO - [81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d] Extracting frames using method: uniform, rate/threshold: 60 +2025-08-22 11:22:48 - INFO - [81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d] Extracted 60 frames successfully. Saving to temporary files... +2025-08-22 11:22:48 - INFO - [81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d] 60 frames saved to temp_videos/81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d +2025-08-22 11:23:14 - INFO - vision_config is None, using default vision config +2025-08-22 11:23:39 - INFO - Tokens per second: 2.721625615443428, Peak GPU memory MB: 14174.375 +2025-08-22 11:23:39 - INFO - [81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d] Inference time: 52.79 seconds, CPU usage: 29.5%, CPU core utilization: [31.5, 7.9, 72.7, 5.5] +2025-08-22 11:23:39 - INFO - [81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d] Cleaned up temporary frame directory: temp_videos/81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d diff --git a/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_130116.log b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_130116.log new file mode 100644 index 0000000000000000000000000000000000000000..216bf6e6bb8b4397a91199cc5ccccec324c1074c --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4-int4/20250822_130116.log @@ -0,0 +1,10 @@ +2025-08-22 13:01:16 - INFO - Loading model: openbmb/MiniCPM-V-4-int4 +2025-08-22 13:01:17 - INFO - vision_config is None, using default vision config +2025-08-22 13:01:22 - INFO - Model loaded in 6.20 seconds +2025-08-22 13:01:22 - INFO - GPU Memory Usage after model load: 2689.45 MB +2025-08-22 13:01:22 - INFO - [85ba68db-be91-4cc5-9fef-9aec73dc02d0] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 13:01:22 - INFO - [85ba68db-be91-4cc5-9fef-9aec73dc02d0] Extracting frames using method: uniform, rate/threshold: 5 +2025-08-22 13:01:23 - INFO - [85ba68db-be91-4cc5-9fef-9aec73dc02d0] Extracted 5 frames successfully. Saving to temporary files... +2025-08-22 13:01:23 - INFO - [85ba68db-be91-4cc5-9fef-9aec73dc02d0] 5 frames saved to temp_videos/85ba68db-be91-4cc5-9fef-9aec73dc02d0 +2025-08-22 13:01:29 - INFO - vision_config is None, using default vision config +2025-08-22 13:01:33 - INFO - [85ba68db-be91-4cc5-9fef-9aec73dc02d0] Cleaned up temporary frame directory: temp_videos/85ba68db-be91-4cc5-9fef-9aec73dc02d0 diff --git a/Direct_Transformers/logs/MiniCPM-V-4/20250822_040510.log b/Direct_Transformers/logs/MiniCPM-V-4/20250822_040510.log new file mode 100644 index 0000000000000000000000000000000000000000..0f78546859163361dac43b550da31cb1224d7a11 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4/20250822_040510.log @@ -0,0 +1,20 @@ +2025-08-22 04:05:10 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-22 04:05:11 - INFO - vision_config is None, using default vision config +2025-08-22 04:06:14 - INFO - Model loaded in 64.06 seconds +2025-08-22 04:06:14 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-22 04:06:14 - INFO - [83e6ecab-4aa8-4145-a723-df97c6a534ff] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:06:14 - INFO - [83e6ecab-4aa8-4145-a723-df97c6a534ff] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:06:19 - INFO - [83e6ecab-4aa8-4145-a723-df97c6a534ff] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:06:20 - INFO - [83e6ecab-4aa8-4145-a723-df97c6a534ff] 30 frames saved to temp_videos/83e6ecab-4aa8-4145-a723-df97c6a534ff +2025-08-22 04:06:36 - INFO - vision_config is None, using default vision config +2025-08-22 04:06:52 - INFO - Tokens per second: 7.668726473028378, Peak GPU memory MB: 13140.375 +2025-08-22 04:06:52 - INFO - [83e6ecab-4aa8-4145-a723-df97c6a534ff] Inference time: 37.56 seconds, CPU usage: 19.3%, CPU core utilization: [22.7, 15.6, 20.2, 18.5] +2025-08-22 04:06:52 - INFO - [83e6ecab-4aa8-4145-a723-df97c6a534ff] Cleaned up temporary frame directory: temp_videos/83e6ecab-4aa8-4145-a723-df97c6a534ff +2025-08-22 04:06:52 - INFO - [1cfb935b-5c0c-4f97-a288-7f0436b30c26] Processing video: 'videos/sample1_rotated.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:06:52 - INFO - [1cfb935b-5c0c-4f97-a288-7f0436b30c26] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:06:53 - INFO - [1cfb935b-5c0c-4f97-a288-7f0436b30c26] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:06:53 - INFO - [1cfb935b-5c0c-4f97-a288-7f0436b30c26] 30 frames saved to temp_videos/1cfb935b-5c0c-4f97-a288-7f0436b30c26 +2025-08-22 04:07:06 - INFO - vision_config is None, using default vision config +2025-08-22 04:07:16 - INFO - Tokens per second: 4.763776873285316, Peak GPU memory MB: 13140.375 +2025-08-22 04:07:16 - INFO - [1cfb935b-5c0c-4f97-a288-7f0436b30c26] Inference time: 23.99 seconds, CPU usage: 29.7%, CPU core utilization: [27.6, 33.2, 34.1, 24.0] +2025-08-22 04:07:16 - INFO - [1cfb935b-5c0c-4f97-a288-7f0436b30c26] Cleaned up temporary frame directory: temp_videos/1cfb935b-5c0c-4f97-a288-7f0436b30c26 diff --git a/Direct_Transformers/logs/MiniCPM-V-4/20250822_044424.log b/Direct_Transformers/logs/MiniCPM-V-4/20250822_044424.log new file mode 100644 index 0000000000000000000000000000000000000000..d2e4771e7b575fc0d0261b88d86108564f428de1 --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4/20250822_044424.log @@ -0,0 +1,11 @@ +2025-08-22 04:44:24 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-22 04:44:25 - INFO - vision_config is None, using default vision config +2025-08-22 04:44:26 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-22 04:45:11 - INFO - Model loaded in 47.20 seconds +2025-08-22 04:45:11 - INFO - GPU Memory Usage after model load: 7802.81 MB +2025-08-22 04:45:11 - INFO - [86c69131-ce9d-4bac-983b-476d9c5c79b7] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:45:11 - INFO - [86c69131-ce9d-4bac-983b-476d9c5c79b7] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:45:12 - INFO - [86c69131-ce9d-4bac-983b-476d9c5c79b7] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:45:12 - INFO - [86c69131-ce9d-4bac-983b-476d9c5c79b7] 30 frames saved to temp_videos/86c69131-ce9d-4bac-983b-476d9c5c79b7 +2025-08-22 04:45:21 - INFO - vision_config is None, using default vision config +2025-08-22 04:45:50 - INFO - [86c69131-ce9d-4bac-983b-476d9c5c79b7] Cleaned up temporary frame directory: temp_videos/86c69131-ce9d-4bac-983b-476d9c5c79b7 diff --git a/Direct_Transformers/logs/MiniCPM-V-4/20250822_044625.log b/Direct_Transformers/logs/MiniCPM-V-4/20250822_044625.log new file mode 100644 index 0000000000000000000000000000000000000000..105994cacddca85e1dc8615aeacb428a8a5d36de --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4/20250822_044625.log @@ -0,0 +1,36 @@ +2025-08-22 04:46:25 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-22 04:46:25 - INFO - vision_config is None, using default vision config +2025-08-22 04:47:05 - INFO - Model loaded in 40.14 seconds +2025-08-22 04:47:05 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-22 04:47:05 - INFO - [0dc2ca5b-e666-4795-b0e6-52bbb7232e2f] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:47:05 - INFO - [0dc2ca5b-e666-4795-b0e6-52bbb7232e2f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:47:06 - INFO - [0dc2ca5b-e666-4795-b0e6-52bbb7232e2f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:47:06 - INFO - [0dc2ca5b-e666-4795-b0e6-52bbb7232e2f] 30 frames saved to temp_videos/0dc2ca5b-e666-4795-b0e6-52bbb7232e2f +2025-08-22 04:47:24 - INFO - vision_config is None, using default vision config +2025-08-22 04:47:37 - INFO - Tokens per second: 6.61058625835899, Peak GPU memory MB: 13140.375 +2025-08-22 04:47:37 - INFO - [0dc2ca5b-e666-4795-b0e6-52bbb7232e2f] Inference time: 31.98 seconds, CPU usage: 24.9%, CPU core utilization: [25.0, 30.3, 24.8, 19.4] +2025-08-22 04:47:37 - INFO - [0dc2ca5b-e666-4795-b0e6-52bbb7232e2f] Cleaned up temporary frame directory: temp_videos/0dc2ca5b-e666-4795-b0e6-52bbb7232e2f +2025-08-22 04:47:37 - INFO - [e76115d1-7380-497c-8cfc-58697760dadb] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:47:37 - INFO - [e76115d1-7380-497c-8cfc-58697760dadb] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:47:42 - INFO - [e76115d1-7380-497c-8cfc-58697760dadb] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:47:42 - INFO - [e76115d1-7380-497c-8cfc-58697760dadb] 30 frames saved to temp_videos/e76115d1-7380-497c-8cfc-58697760dadb +2025-08-22 04:47:55 - INFO - vision_config is None, using default vision config +2025-08-22 04:48:12 - INFO - Tokens per second: 8.564189167013154, Peak GPU memory MB: 13140.375 +2025-08-22 04:48:12 - INFO - [e76115d1-7380-497c-8cfc-58697760dadb] Inference time: 35.38 seconds, CPU usage: 46.0%, CPU core utilization: [29.3, 30.8, 36.2, 87.5] +2025-08-22 04:48:12 - INFO - [e76115d1-7380-497c-8cfc-58697760dadb] Cleaned up temporary frame directory: temp_videos/e76115d1-7380-497c-8cfc-58697760dadb +2025-08-22 04:48:12 - INFO - [03891848-b72e-411b-a6c0-12d7a1feeec6] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:48:12 - INFO - [03891848-b72e-411b-a6c0-12d7a1feeec6] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:48:14 - INFO - [03891848-b72e-411b-a6c0-12d7a1feeec6] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:48:14 - INFO - [03891848-b72e-411b-a6c0-12d7a1feeec6] 30 frames saved to temp_videos/03891848-b72e-411b-a6c0-12d7a1feeec6 +2025-08-22 04:48:26 - INFO - vision_config is None, using default vision config +2025-08-22 04:48:40 - INFO - Tokens per second: 6.871595189671803, Peak GPU memory MB: 13140.375 +2025-08-22 04:48:40 - INFO - [03891848-b72e-411b-a6c0-12d7a1feeec6] Inference time: 27.67 seconds, CPU usage: 49.1%, CPU core utilization: [36.7, 43.2, 46.6, 69.9] +2025-08-22 04:48:40 - INFO - [03891848-b72e-411b-a6c0-12d7a1feeec6] Cleaned up temporary frame directory: temp_videos/03891848-b72e-411b-a6c0-12d7a1feeec6 +2025-08-22 04:48:40 - INFO - [ee171f56-31f3-44aa-ab53-8278de0cf252] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 04:48:40 - INFO - [ee171f56-31f3-44aa-ab53-8278de0cf252] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 04:48:41 - INFO - [ee171f56-31f3-44aa-ab53-8278de0cf252] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 04:48:41 - INFO - [ee171f56-31f3-44aa-ab53-8278de0cf252] 30 frames saved to temp_videos/ee171f56-31f3-44aa-ab53-8278de0cf252 +2025-08-22 04:48:54 - INFO - vision_config is None, using default vision config +2025-08-22 04:49:09 - INFO - Tokens per second: 7.413903967514163, Peak GPU memory MB: 13140.375 +2025-08-22 04:49:09 - INFO - [ee171f56-31f3-44aa-ab53-8278de0cf252] Inference time: 29.01 seconds, CPU usage: 53.5%, CPU core utilization: [52.8, 38.9, 74.9, 47.2] +2025-08-22 04:49:09 - INFO - [ee171f56-31f3-44aa-ab53-8278de0cf252] Cleaned up temporary frame directory: temp_videos/ee171f56-31f3-44aa-ab53-8278de0cf252 diff --git a/Direct_Transformers/logs/MiniCPM-V-4/20250822_053357.log b/Direct_Transformers/logs/MiniCPM-V-4/20250822_053357.log new file mode 100644 index 0000000000000000000000000000000000000000..1b633cdf73fcefeb89c216bf1920fa27df78005d --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4/20250822_053357.log @@ -0,0 +1,10 @@ +2025-08-22 05:33:57 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-22 05:33:58 - INFO - vision_config is None, using default vision config +2025-08-22 05:35:13 - INFO - Model loaded in 75.34 seconds +2025-08-22 05:35:13 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-22 05:35:13 - INFO - [6615b980-e213-46d9-a038-7bbde52dad22] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:35:13 - INFO - [6615b980-e213-46d9-a038-7bbde52dad22] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:35:14 - INFO - [6615b980-e213-46d9-a038-7bbde52dad22] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:35:14 - INFO - [6615b980-e213-46d9-a038-7bbde52dad22] 30 frames saved to temp_videos/6615b980-e213-46d9-a038-7bbde52dad22 +2025-08-22 05:35:23 - INFO - vision_config is None, using default vision config +2025-08-22 05:40:28 - INFO - [6615b980-e213-46d9-a038-7bbde52dad22] Cleaned up temporary frame directory: temp_videos/6615b980-e213-46d9-a038-7bbde52dad22 diff --git a/Direct_Transformers/logs/MiniCPM-V-4/20250822_054230.log b/Direct_Transformers/logs/MiniCPM-V-4/20250822_054230.log new file mode 100644 index 0000000000000000000000000000000000000000..cb69506d9bd4dc5e181ccafe24cd5071444d1a9c --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4/20250822_054230.log @@ -0,0 +1,36 @@ +2025-08-22 05:42:30 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-22 05:42:30 - INFO - vision_config is None, using default vision config +2025-08-22 05:43:28 - INFO - Model loaded in 58.39 seconds +2025-08-22 05:43:28 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-22 05:43:28 - INFO - [dcfe8de7-58f0-4162-8d7b-a513c00e893c] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:43:28 - INFO - [dcfe8de7-58f0-4162-8d7b-a513c00e893c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:43:29 - INFO - [dcfe8de7-58f0-4162-8d7b-a513c00e893c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:43:29 - INFO - [dcfe8de7-58f0-4162-8d7b-a513c00e893c] 30 frames saved to temp_videos/dcfe8de7-58f0-4162-8d7b-a513c00e893c +2025-08-22 05:43:46 - INFO - vision_config is None, using default vision config +2025-08-22 05:44:00 - INFO - Tokens per second: 6.97994619995469, Peak GPU memory MB: 13140.375 +2025-08-22 05:44:00 - INFO - [dcfe8de7-58f0-4162-8d7b-a513c00e893c] Inference time: 31.93 seconds, CPU usage: 14.5%, CPU core utilization: [19.4, 9.9, 20.4, 8.5] +2025-08-22 05:44:00 - INFO - [dcfe8de7-58f0-4162-8d7b-a513c00e893c] Cleaned up temporary frame directory: temp_videos/dcfe8de7-58f0-4162-8d7b-a513c00e893c +2025-08-22 05:44:00 - INFO - [9688bb31-4a5b-4174-a5d3-b655a94be714] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:44:00 - INFO - [9688bb31-4a5b-4174-a5d3-b655a94be714] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:44:06 - INFO - [9688bb31-4a5b-4174-a5d3-b655a94be714] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:44:06 - INFO - [9688bb31-4a5b-4174-a5d3-b655a94be714] 30 frames saved to temp_videos/9688bb31-4a5b-4174-a5d3-b655a94be714 +2025-08-22 05:44:18 - INFO - vision_config is None, using default vision config +2025-08-22 05:44:32 - INFO - Tokens per second: 7.346576015565981, Peak GPU memory MB: 13140.375 +2025-08-22 05:44:32 - INFO - [9688bb31-4a5b-4174-a5d3-b655a94be714] Inference time: 32.25 seconds, CPU usage: 38.6%, CPU core utilization: [21.7, 37.1, 52.7, 42.8] +2025-08-22 05:44:32 - INFO - [9688bb31-4a5b-4174-a5d3-b655a94be714] Cleaned up temporary frame directory: temp_videos/9688bb31-4a5b-4174-a5d3-b655a94be714 +2025-08-22 05:44:32 - INFO - [f8b48685-3207-4cb6-acd1-a6cc9c6dbf99] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:44:32 - INFO - [f8b48685-3207-4cb6-acd1-a6cc9c6dbf99] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:44:33 - INFO - [f8b48685-3207-4cb6-acd1-a6cc9c6dbf99] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:44:33 - INFO - [f8b48685-3207-4cb6-acd1-a6cc9c6dbf99] 30 frames saved to temp_videos/f8b48685-3207-4cb6-acd1-a6cc9c6dbf99 +2025-08-22 05:44:46 - INFO - vision_config is None, using default vision config +2025-08-22 05:44:59 - INFO - Tokens per second: 6.41842237886657, Peak GPU memory MB: 13140.375 +2025-08-22 05:44:59 - INFO - [f8b48685-3207-4cb6-acd1-a6cc9c6dbf99] Inference time: 26.35 seconds, CPU usage: 30.3%, CPU core utilization: [33.7, 27.4, 14.8, 45.4] +2025-08-22 05:44:59 - INFO - [f8b48685-3207-4cb6-acd1-a6cc9c6dbf99] Cleaned up temporary frame directory: temp_videos/f8b48685-3207-4cb6-acd1-a6cc9c6dbf99 +2025-08-22 05:44:59 - INFO - [efd73a2a-1666-4017-a20b-b13c69acc01c] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:44:59 - INFO - [efd73a2a-1666-4017-a20b-b13c69acc01c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:45:00 - INFO - [efd73a2a-1666-4017-a20b-b13c69acc01c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:45:00 - INFO - [efd73a2a-1666-4017-a20b-b13c69acc01c] 30 frames saved to temp_videos/efd73a2a-1666-4017-a20b-b13c69acc01c +2025-08-22 05:45:13 - INFO - vision_config is None, using default vision config +2025-08-22 05:45:22 - INFO - Tokens per second: 3.94583234151262, Peak GPU memory MB: 13140.375 +2025-08-22 05:45:22 - INFO - [efd73a2a-1666-4017-a20b-b13c69acc01c] Inference time: 23.41 seconds, CPU usage: 30.0%, CPU core utilization: [48.4, 15.8, 22.9, 32.8] +2025-08-22 05:45:22 - INFO - [efd73a2a-1666-4017-a20b-b13c69acc01c] Cleaned up temporary frame directory: temp_videos/efd73a2a-1666-4017-a20b-b13c69acc01c diff --git a/Direct_Transformers/logs/MiniCPM-V-4/20250822_055004.log b/Direct_Transformers/logs/MiniCPM-V-4/20250822_055004.log new file mode 100644 index 0000000000000000000000000000000000000000..2a8e1c59fe1f8f00eff7ac32f2e25f96fa8d893c --- /dev/null +++ b/Direct_Transformers/logs/MiniCPM-V-4/20250822_055004.log @@ -0,0 +1,44 @@ +2025-08-22 05:50:04 - INFO - Loading model: openbmb/MiniCPM-V-4 +2025-08-22 05:50:04 - INFO - vision_config is None, using default vision config +2025-08-22 05:50:10 - INFO - Model loaded in 5.81 seconds +2025-08-22 05:50:10 - INFO - GPU Memory Usage after model load: 7802.99 MB +2025-08-22 05:50:10 - INFO - [62f6a61f-a889-4e9b-8a2b-6951dc9722c3] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:50:10 - INFO - [62f6a61f-a889-4e9b-8a2b-6951dc9722c3] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:50:13 - INFO - [62f6a61f-a889-4e9b-8a2b-6951dc9722c3] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:50:13 - INFO - [62f6a61f-a889-4e9b-8a2b-6951dc9722c3] 30 frames saved to temp_videos/62f6a61f-a889-4e9b-8a2b-6951dc9722c3 +2025-08-22 05:50:30 - INFO - vision_config is None, using default vision config +2025-08-22 05:50:49 - INFO - Tokens per second: 9.17590678416416, Peak GPU memory MB: 13140.375 +2025-08-22 05:50:49 - INFO - [62f6a61f-a889-4e9b-8a2b-6951dc9722c3] Inference time: 39.74 seconds, CPU usage: 28.0%, CPU core utilization: [18.8, 19.5, 32.4, 41.2] +2025-08-22 05:50:49 - INFO - [62f6a61f-a889-4e9b-8a2b-6951dc9722c3] Cleaned up temporary frame directory: temp_videos/62f6a61f-a889-4e9b-8a2b-6951dc9722c3 +2025-08-22 05:50:49 - INFO - [b9fca7fa-fde3-4586-8012-bdd9dc186b9a] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:50:49 - INFO - [b9fca7fa-fde3-4586-8012-bdd9dc186b9a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:50:50 - INFO - [b9fca7fa-fde3-4586-8012-bdd9dc186b9a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:50:50 - INFO - [b9fca7fa-fde3-4586-8012-bdd9dc186b9a] 30 frames saved to temp_videos/b9fca7fa-fde3-4586-8012-bdd9dc186b9a +2025-08-22 05:51:03 - INFO - vision_config is None, using default vision config +2025-08-22 05:51:20 - INFO - Tokens per second: 8.507203392291894, Peak GPU memory MB: 13140.375 +2025-08-22 05:51:20 - INFO - [b9fca7fa-fde3-4586-8012-bdd9dc186b9a] Inference time: 30.91 seconds, CPU usage: 29.6%, CPU core utilization: [29.5, 11.7, 16.7, 60.3] +2025-08-22 05:51:20 - INFO - [b9fca7fa-fde3-4586-8012-bdd9dc186b9a] Cleaned up temporary frame directory: temp_videos/b9fca7fa-fde3-4586-8012-bdd9dc186b9a +2025-08-22 05:51:20 - INFO - [78948d9e-12b2-41b6-8686-6132206bad53] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:51:20 - INFO - [78948d9e-12b2-41b6-8686-6132206bad53] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:51:25 - INFO - [78948d9e-12b2-41b6-8686-6132206bad53] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:51:25 - INFO - [78948d9e-12b2-41b6-8686-6132206bad53] 30 frames saved to temp_videos/78948d9e-12b2-41b6-8686-6132206bad53 +2025-08-22 05:51:38 - INFO - vision_config is None, using default vision config +2025-08-22 05:51:54 - INFO - Tokens per second: 7.992290454420001, Peak GPU memory MB: 13140.375 +2025-08-22 05:51:54 - INFO - [78948d9e-12b2-41b6-8686-6132206bad53] Inference time: 33.76 seconds, CPU usage: 37.5%, CPU core utilization: [31.0, 24.7, 59.5, 34.9] +2025-08-22 05:51:54 - INFO - [78948d9e-12b2-41b6-8686-6132206bad53] Cleaned up temporary frame directory: temp_videos/78948d9e-12b2-41b6-8686-6132206bad53 +2025-08-22 05:51:54 - INFO - [46b4eb7a-5eef-471f-8310-3b63c7cc08c8] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:51:54 - INFO - [46b4eb7a-5eef-471f-8310-3b63c7cc08c8] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:51:55 - INFO - [46b4eb7a-5eef-471f-8310-3b63c7cc08c8] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:51:55 - INFO - [46b4eb7a-5eef-471f-8310-3b63c7cc08c8] 30 frames saved to temp_videos/46b4eb7a-5eef-471f-8310-3b63c7cc08c8 +2025-08-22 05:52:08 - INFO - vision_config is None, using default vision config +2025-08-22 05:52:19 - INFO - Tokens per second: 5.582476898169938, Peak GPU memory MB: 13140.375 +2025-08-22 05:52:19 - INFO - [46b4eb7a-5eef-471f-8310-3b63c7cc08c8] Inference time: 25.22 seconds, CPU usage: 29.9%, CPU core utilization: [41.2, 8.1, 62.4, 7.4] +2025-08-22 05:52:19 - INFO - [46b4eb7a-5eef-471f-8310-3b63c7cc08c8] Cleaned up temporary frame directory: temp_videos/46b4eb7a-5eef-471f-8310-3b63c7cc08c8 +2025-08-22 05:52:19 - INFO - [7c1bd69f-ec68-4699-b7ae-d253fc203819] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:52:19 - INFO - [7c1bd69f-ec68-4699-b7ae-d253fc203819] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:52:20 - INFO - [7c1bd69f-ec68-4699-b7ae-d253fc203819] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:52:20 - INFO - [7c1bd69f-ec68-4699-b7ae-d253fc203819] 30 frames saved to temp_videos/7c1bd69f-ec68-4699-b7ae-d253fc203819 +2025-08-22 05:52:33 - INFO - vision_config is None, using default vision config +2025-08-22 05:52:43 - INFO - Tokens per second: 4.288594229547833, Peak GPU memory MB: 13140.375 +2025-08-22 05:52:43 - INFO - [7c1bd69f-ec68-4699-b7ae-d253fc203819] Inference time: 23.79 seconds, CPU usage: 30.4%, CPU core utilization: [48.7, 44.8, 13.8, 14.2] +2025-08-22 05:52:43 - INFO - [7c1bd69f-ec68-4699-b7ae-d253fc203819] Cleaned up temporary frame directory: temp_videos/7c1bd69f-ec68-4699-b7ae-d253fc203819 diff --git a/Direct_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250822_055548.log b/Direct_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250822_055548.log new file mode 100644 index 0000000000000000000000000000000000000000..86ced5c87a639ca0878b8d4bb6ef2fc200242077 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2-VL-2B-Instruct-AWQ/20250822_055548.log @@ -0,0 +1,44 @@ +2025-08-22 05:55:48 - INFO - Loading model: Qwen/Qwen2-VL-2B-Instruct-AWQ +2025-08-22 05:55:52 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-22 05:56:18 - INFO - Model loaded in 30.20 seconds +2025-08-22 05:56:18 - INFO - GPU Memory Usage after model load: 2369.47 MB +2025-08-22 05:56:18 - INFO - [dc39e294-469d-421f-bfa2-1bf3d91c18a4] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:56:18 - INFO - [dc39e294-469d-421f-bfa2-1bf3d91c18a4] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:56:22 - INFO - [dc39e294-469d-421f-bfa2-1bf3d91c18a4] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:56:22 - INFO - [dc39e294-469d-421f-bfa2-1bf3d91c18a4] 30 frames saved to temp_videos/dc39e294-469d-421f-bfa2-1bf3d91c18a4 +2025-08-22 05:56:22 - INFO - Prompt token length: 2330 +2025-08-22 05:56:31 - INFO - Tokens per second: 20.134168800564364, Peak GPU memory MB: 5842.375 +2025-08-22 05:56:31 - INFO - [dc39e294-469d-421f-bfa2-1bf3d91c18a4] Inference time: 12.46 seconds, CPU usage: 19.4%, CPU core utilization: [12.0, 20.5, 15.0, 30.1] +2025-08-22 05:56:31 - INFO - [dc39e294-469d-421f-bfa2-1bf3d91c18a4] Cleaned up temporary frame directory: temp_videos/dc39e294-469d-421f-bfa2-1bf3d91c18a4 +2025-08-22 05:56:31 - INFO - [8235bab7-dc62-460f-ada4-6de957321cc5] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:56:31 - INFO - [8235bab7-dc62-460f-ada4-6de957321cc5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:56:32 - INFO - [8235bab7-dc62-460f-ada4-6de957321cc5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:56:32 - INFO - [8235bab7-dc62-460f-ada4-6de957321cc5] 30 frames saved to temp_videos/8235bab7-dc62-460f-ada4-6de957321cc5 +2025-08-22 05:56:32 - INFO - Prompt token length: 2330 +2025-08-22 05:56:39 - INFO - Tokens per second: 20.431106571476665, Peak GPU memory MB: 5842.375 +2025-08-22 05:56:39 - INFO - [8235bab7-dc62-460f-ada4-6de957321cc5] Inference time: 8.50 seconds, CPU usage: 34.1%, CPU core utilization: [12.1, 22.3, 14.3, 87.0] +2025-08-22 05:56:39 - INFO - [8235bab7-dc62-460f-ada4-6de957321cc5] Cleaned up temporary frame directory: temp_videos/8235bab7-dc62-460f-ada4-6de957321cc5 +2025-08-22 05:56:39 - INFO - [dd6002c8-8398-4d52-8fa4-4fd187b6b7d0] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:56:39 - INFO - [dd6002c8-8398-4d52-8fa4-4fd187b6b7d0] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:56:44 - INFO - [dd6002c8-8398-4d52-8fa4-4fd187b6b7d0] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:56:44 - INFO - [dd6002c8-8398-4d52-8fa4-4fd187b6b7d0] 30 frames saved to temp_videos/dd6002c8-8398-4d52-8fa4-4fd187b6b7d0 +2025-08-22 05:56:45 - INFO - Prompt token length: 2330 +2025-08-22 05:56:52 - INFO - Tokens per second: 20.501854828620406, Peak GPU memory MB: 5842.375 +2025-08-22 05:56:52 - INFO - [dd6002c8-8398-4d52-8fa4-4fd187b6b7d0] Inference time: 12.47 seconds, CPU usage: 53.8%, CPU core utilization: [67.7, 66.0, 39.3, 42.3] +2025-08-22 05:56:52 - INFO - [dd6002c8-8398-4d52-8fa4-4fd187b6b7d0] Cleaned up temporary frame directory: temp_videos/dd6002c8-8398-4d52-8fa4-4fd187b6b7d0 +2025-08-22 05:56:52 - INFO - [53d433da-8e89-473b-8094-b3749860494d] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:56:52 - INFO - [53d433da-8e89-473b-8094-b3749860494d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:56:53 - INFO - [53d433da-8e89-473b-8094-b3749860494d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:56:53 - INFO - [53d433da-8e89-473b-8094-b3749860494d] 30 frames saved to temp_videos/53d433da-8e89-473b-8094-b3749860494d +2025-08-22 05:56:53 - INFO - Prompt token length: 2330 +2025-08-22 05:56:56 - INFO - Tokens per second: 17.02981294121544, Peak GPU memory MB: 5842.375 +2025-08-22 05:56:56 - INFO - [53d433da-8e89-473b-8094-b3749860494d] Inference time: 3.99 seconds, CPU usage: 39.7%, CPU core utilization: [21.0, 25.6, 22.5, 89.7] +2025-08-22 05:56:56 - INFO - [53d433da-8e89-473b-8094-b3749860494d] Cleaned up temporary frame directory: temp_videos/53d433da-8e89-473b-8094-b3749860494d +2025-08-22 05:56:56 - INFO - [103151cf-21a1-443f-a384-6de8311204ef] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:56:56 - INFO - [103151cf-21a1-443f-a384-6de8311204ef] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:56:57 - INFO - [103151cf-21a1-443f-a384-6de8311204ef] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:56:57 - INFO - [103151cf-21a1-443f-a384-6de8311204ef] 30 frames saved to temp_videos/103151cf-21a1-443f-a384-6de8311204ef +2025-08-22 05:56:57 - INFO - Prompt token length: 2330 +2025-08-22 05:57:03 - INFO - Tokens per second: 20.469474642883824, Peak GPU memory MB: 5842.375 +2025-08-22 05:57:03 - INFO - [103151cf-21a1-443f-a384-6de8311204ef] Inference time: 7.35 seconds, CPU usage: 34.7%, CPU core utilization: [47.5, 53.6, 13.8, 23.8] +2025-08-22 05:57:03 - INFO - [103151cf-21a1-443f-a384-6de8311204ef] Cleaned up temporary frame directory: temp_videos/103151cf-21a1-443f-a384-6de8311204ef diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_050641.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_050641.log new file mode 100644 index 0000000000000000000000000000000000000000..22ab92af66051aef19137cd7019be1f48b985052 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_050641.log @@ -0,0 +1,36 @@ +2025-08-22 05:06:41 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:06:44 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-22 05:07:15 - INFO - Model loaded in 33.99 seconds +2025-08-22 05:07:15 - INFO - GPU Memory Usage after model load: 3250.55 MB +2025-08-22 05:07:15 - INFO - [8cd3d96d-c2de-4134-a862-35a3fcb1b555] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:07:15 - INFO - [8cd3d96d-c2de-4134-a862-35a3fcb1b555] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:07:16 - INFO - [8cd3d96d-c2de-4134-a862-35a3fcb1b555] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:07:16 - INFO - [8cd3d96d-c2de-4134-a862-35a3fcb1b555] 30 frames saved to temp_videos/8cd3d96d-c2de-4134-a862-35a3fcb1b555 +2025-08-22 05:07:16 - INFO - Prompt token length: 2330 +2025-08-22 05:07:25 - INFO - Tokens per second: 10.331790492190137, Peak GPU memory MB: 6678.375 +2025-08-22 05:07:25 - INFO - [8cd3d96d-c2de-4134-a862-35a3fcb1b555] Inference time: 10.04 seconds, CPU usage: 54.4%, CPU core utilization: [52.5, 52.4, 54.7, 58.0] +2025-08-22 05:07:25 - INFO - [8cd3d96d-c2de-4134-a862-35a3fcb1b555] Cleaned up temporary frame directory: temp_videos/8cd3d96d-c2de-4134-a862-35a3fcb1b555 +2025-08-22 05:07:25 - INFO - [5d471359-940e-4bd4-b596-f4a4624c793d] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:07:25 - INFO - [5d471359-940e-4bd4-b596-f4a4624c793d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:07:30 - INFO - [5d471359-940e-4bd4-b596-f4a4624c793d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:07:30 - INFO - [5d471359-940e-4bd4-b596-f4a4624c793d] 30 frames saved to temp_videos/5d471359-940e-4bd4-b596-f4a4624c793d +2025-08-22 05:07:30 - INFO - Prompt token length: 2330 +2025-08-22 05:07:38 - INFO - Tokens per second: 11.553518987742192, Peak GPU memory MB: 6678.375 +2025-08-22 05:07:38 - INFO - [5d471359-940e-4bd4-b596-f4a4624c793d] Inference time: 13.80 seconds, CPU usage: 52.7%, CPU core utilization: [38.7, 95.7, 37.9, 38.4] +2025-08-22 05:07:38 - INFO - [5d471359-940e-4bd4-b596-f4a4624c793d] Cleaned up temporary frame directory: temp_videos/5d471359-940e-4bd4-b596-f4a4624c793d +2025-08-22 05:07:38 - INFO - [f4bebc66-b044-41e2-93f3-37223b475f3b] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:07:38 - INFO - [f4bebc66-b044-41e2-93f3-37223b475f3b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:07:39 - INFO - [f4bebc66-b044-41e2-93f3-37223b475f3b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:07:39 - INFO - [f4bebc66-b044-41e2-93f3-37223b475f3b] 30 frames saved to temp_videos/f4bebc66-b044-41e2-93f3-37223b475f3b +2025-08-22 05:07:39 - INFO - Prompt token length: 2330 +2025-08-22 05:07:46 - INFO - Tokens per second: 11.406962184669279, Peak GPU memory MB: 6678.375 +2025-08-22 05:07:46 - INFO - [f4bebc66-b044-41e2-93f3-37223b475f3b] Inference time: 7.82 seconds, CPU usage: 36.0%, CPU core utilization: [63.3, 17.0, 45.0, 18.4] +2025-08-22 05:07:46 - INFO - [f4bebc66-b044-41e2-93f3-37223b475f3b] Cleaned up temporary frame directory: temp_videos/f4bebc66-b044-41e2-93f3-37223b475f3b +2025-08-22 05:07:46 - INFO - [4f0b4cd9-0146-48b2-9792-8e69c56ee3cf] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:07:46 - INFO - [4f0b4cd9-0146-48b2-9792-8e69c56ee3cf] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:07:47 - INFO - [4f0b4cd9-0146-48b2-9792-8e69c56ee3cf] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:07:47 - INFO - [4f0b4cd9-0146-48b2-9792-8e69c56ee3cf] 30 frames saved to temp_videos/4f0b4cd9-0146-48b2-9792-8e69c56ee3cf +2025-08-22 05:07:47 - INFO - Prompt token length: 2330 +2025-08-22 05:07:56 - INFO - Tokens per second: 11.491923072165466, Peak GPU memory MB: 6678.375 +2025-08-22 05:07:56 - INFO - [4f0b4cd9-0146-48b2-9792-8e69c56ee3cf] Inference time: 9.76 seconds, CPU usage: 33.5%, CPU core utilization: [11.3, 73.9, 12.9, 35.9] +2025-08-22 05:07:56 - INFO - [4f0b4cd9-0146-48b2-9792-8e69c56ee3cf] Cleaned up temporary frame directory: temp_videos/4f0b4cd9-0146-48b2-9792-8e69c56ee3cf diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051800.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051800.log new file mode 100644 index 0000000000000000000000000000000000000000..5b0743c5d4907faa4b520d3e81ef746be5889bb7 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051800.log @@ -0,0 +1,31 @@ +2025-08-22 05:18:00 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:18:01 - ERROR - 处理失败: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 129, in main + model = Qwen2VL(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/qwen.py", line 14, in __init__ + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 306, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5141, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 134, in replace_with_awq_linear + from awq.modules.linear.gemm import WQLinear_GEMM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/__init__.py", line 2, in + from awq.models.auto import AutoAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/__init__.py", line 1, in + from .mpt import MptAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in + from .base import BaseAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/base.py", line 13, in + from transformers.modeling_utils import shard_checkpoint +ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051822.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051822.log new file mode 100644 index 0000000000000000000000000000000000000000..b0c818cc28c86e0abe6d934300f49fad23e94107 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051822.log @@ -0,0 +1,31 @@ +2025-08-22 05:18:22 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:18:23 - ERROR - 处理失败: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 129, in main + model = Qwen2VL(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/qwen.py", line 14, in __init__ + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 306, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5141, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 134, in replace_with_awq_linear + from awq.modules.linear.gemm import WQLinear_GEMM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/__init__.py", line 2, in + from awq.models.auto import AutoAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/__init__.py", line 1, in + from .mpt import MptAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in + from .base import BaseAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/base.py", line 13, in + from transformers.modeling_utils import shard_checkpoint +ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051912.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051912.log new file mode 100644 index 0000000000000000000000000000000000000000..fa67914af64576f60b1a93a08e95866417c3352b --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_051912.log @@ -0,0 +1,31 @@ +2025-08-22 05:19:12 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:19:14 - ERROR - 处理失败: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 129, in main + model = Qwen2VL(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/qwen.py", line 14, in __init__ + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5028, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 134, in replace_with_awq_linear + from awq.modules.linear.gemm import WQLinear_GEMM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/__init__.py", line 2, in + from awq.models.auto import AutoAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/__init__.py", line 1, in + from .mpt import MptAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in + from .base import BaseAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/base.py", line 13, in + from transformers.modeling_utils import shard_checkpoint +ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_052329.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_052329.log new file mode 100644 index 0000000000000000000000000000000000000000..d6a91dbb0e683ec83398b61c75fa8bfeda9fdee1 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_052329.log @@ -0,0 +1,31 @@ +2025-08-22 05:23:29 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:23:30 - ERROR - 处理失败: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 129, in main + model = Qwen2VL(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/qwen.py", line 14, in __init__ + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5028, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 134, in replace_with_awq_linear + from awq.modules.linear.gemm import WQLinear_GEMM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/__init__.py", line 2, in + from awq.models.auto import AutoAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/__init__.py", line 1, in + from .mpt import MptAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in + from .base import BaseAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/base.py", line 13, in + from transformers.modeling_utils import shard_checkpoint +ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_052447.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_052447.log new file mode 100644 index 0000000000000000000000000000000000000000..75d93c5e00d6ebbe61f35afaf425ab14e0626605 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_052447.log @@ -0,0 +1,31 @@ +2025-08-22 05:24:47 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:24:48 - ERROR - 处理失败: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 129, in main + model = Qwen2VL(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/qwen.py", line 14, in __init__ + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 315, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 4959, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 134, in replace_with_awq_linear + from awq.modules.linear.gemm import WQLinear_GEMM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/__init__.py", line 2, in + from awq.models.auto import AutoAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/__init__.py", line 1, in + from .mpt import MptAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in + from .base import BaseAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/base.py", line 13, in + from transformers.modeling_utils import shard_checkpoint +ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053053.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053053.log new file mode 100644 index 0000000000000000000000000000000000000000..6c388803b6e50f4d0d2784b3424538b4fc77ae52 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053053.log @@ -0,0 +1,31 @@ +2025-08-22 05:30:53 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:30:54 - ERROR - 处理失败: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/test/infer.py", line 129, in main + model = Qwen2VL(args.model_path) + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/mnt/data/xiuying/Code/test/models/qwen.py", line 14, in __init__ + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 317, in _wrapper + return func(*args, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py", line 5028, in from_pretrained + hf_quantizer.preprocess_model( + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/base.py", line 225, in preprocess_model + return self._process_model_before_weight_loading(model, **kwargs) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/quantizers/quantizer_awq.py", line 121, in _process_model_before_weight_loading + model, has_been_replaced = replace_with_awq_linear( + ^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/integrations/awq.py", line 134, in replace_with_awq_linear + from awq.modules.linear.gemm import WQLinear_GEMM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/__init__.py", line 2, in + from awq.models.auto import AutoAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/__init__.py", line 1, in + from .mpt import MptAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/mpt.py", line 1, in + from .base import BaseAWQForCausalLM + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/awq/models/base.py", line 13, in + from transformers.modeling_utils import shard_checkpoint +ImportError: cannot import name 'shard_checkpoint' from 'transformers.modeling_utils' (/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/transformers/modeling_utils.py) diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053257.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053257.log new file mode 100644 index 0000000000000000000000000000000000000000..13acf5f6eeea4adf9a4975c345c290afaa38b30d --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053257.log @@ -0,0 +1,36 @@ +2025-08-22 05:32:57 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:33:00 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-22 05:33:08 - INFO - Model loaded in 10.72 seconds +2025-08-22 05:33:08 - INFO - GPU Memory Usage after model load: 3250.55 MB +2025-08-22 05:33:08 - INFO - [420e0f62-2b75-4084-9d43-f9b49d0fc06a] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:33:08 - INFO - [420e0f62-2b75-4084-9d43-f9b49d0fc06a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:33:09 - INFO - [420e0f62-2b75-4084-9d43-f9b49d0fc06a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:33:09 - INFO - [420e0f62-2b75-4084-9d43-f9b49d0fc06a] 30 frames saved to temp_videos/420e0f62-2b75-4084-9d43-f9b49d0fc06a +2025-08-22 05:33:09 - INFO - Prompt token length: 2330 +2025-08-22 05:33:15 - INFO - Tokens per second: 14.911576721939884, Peak GPU memory MB: 6678.375 +2025-08-22 05:33:15 - INFO - [420e0f62-2b75-4084-9d43-f9b49d0fc06a] Inference time: 6.99 seconds, CPU usage: 64.2%, CPU core utilization: [61.3, 73.2, 64.4, 57.8] +2025-08-22 05:33:15 - INFO - [420e0f62-2b75-4084-9d43-f9b49d0fc06a] Cleaned up temporary frame directory: temp_videos/420e0f62-2b75-4084-9d43-f9b49d0fc06a +2025-08-22 05:33:15 - INFO - [2927cd25-59cf-4a51-a0f5-b6c7a3382630] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:33:15 - INFO - [2927cd25-59cf-4a51-a0f5-b6c7a3382630] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:33:23 - INFO - [2927cd25-59cf-4a51-a0f5-b6c7a3382630] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:33:23 - INFO - [2927cd25-59cf-4a51-a0f5-b6c7a3382630] 30 frames saved to temp_videos/2927cd25-59cf-4a51-a0f5-b6c7a3382630 +2025-08-22 05:33:23 - INFO - Prompt token length: 2330 +2025-08-22 05:33:30 - INFO - Tokens per second: 16.006104859630515, Peak GPU memory MB: 6678.375 +2025-08-22 05:33:30 - INFO - [2927cd25-59cf-4a51-a0f5-b6c7a3382630] Inference time: 15.54 seconds, CPU usage: 72.5%, CPU core utilization: [65.7, 66.6, 94.6, 62.9] +2025-08-22 05:33:30 - INFO - [2927cd25-59cf-4a51-a0f5-b6c7a3382630] Cleaned up temporary frame directory: temp_videos/2927cd25-59cf-4a51-a0f5-b6c7a3382630 +2025-08-22 05:33:30 - INFO - [84ae9443-0548-4aee-aaa3-ab8e2af789be] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:33:30 - INFO - [84ae9443-0548-4aee-aaa3-ab8e2af789be] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:33:31 - INFO - [84ae9443-0548-4aee-aaa3-ab8e2af789be] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:33:31 - INFO - [84ae9443-0548-4aee-aaa3-ab8e2af789be] 30 frames saved to temp_videos/84ae9443-0548-4aee-aaa3-ab8e2af789be +2025-08-22 05:33:31 - INFO - Prompt token length: 2330 +2025-08-22 05:33:37 - INFO - Tokens per second: 16.048521811500464, Peak GPU memory MB: 6678.375 +2025-08-22 05:33:37 - INFO - [84ae9443-0548-4aee-aaa3-ab8e2af789be] Inference time: 6.63 seconds, CPU usage: 35.1%, CPU core utilization: [17.8, 17.3, 91.5, 13.4] +2025-08-22 05:33:37 - INFO - [84ae9443-0548-4aee-aaa3-ab8e2af789be] Cleaned up temporary frame directory: temp_videos/84ae9443-0548-4aee-aaa3-ab8e2af789be +2025-08-22 05:33:37 - INFO - [e430b9af-6192-491b-8854-7fb9a375b31c] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:33:37 - INFO - [e430b9af-6192-491b-8854-7fb9a375b31c] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:33:38 - INFO - [e430b9af-6192-491b-8854-7fb9a375b31c] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:33:38 - INFO - [e430b9af-6192-491b-8854-7fb9a375b31c] 30 frames saved to temp_videos/e430b9af-6192-491b-8854-7fb9a375b31c +2025-08-22 05:33:38 - INFO - Prompt token length: 2330 +2025-08-22 05:33:45 - INFO - Tokens per second: 16.138695170908093, Peak GPU memory MB: 6678.375 +2025-08-22 05:33:45 - INFO - [e430b9af-6192-491b-8854-7fb9a375b31c] Inference time: 8.05 seconds, CPU usage: 34.1%, CPU core utilization: [38.0, 14.0, 72.5, 11.7] +2025-08-22 05:33:45 - INFO - [e430b9af-6192-491b-8854-7fb9a375b31c] Cleaned up temporary frame directory: temp_videos/e430b9af-6192-491b-8854-7fb9a375b31c diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_054107.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_054107.log new file mode 100644 index 0000000000000000000000000000000000000000..2f344f22cb5d610e595b804f0403d7b0c3e73585 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_054107.log @@ -0,0 +1,36 @@ +2025-08-22 05:41:07 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:41:12 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-22 05:41:42 - INFO - Model loaded in 34.82 seconds +2025-08-22 05:41:42 - INFO - GPU Memory Usage after model load: 3250.55 MB +2025-08-22 05:41:42 - INFO - [145be8a8-9764-4818-a9b2-684ef006547b] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:41:42 - INFO - [145be8a8-9764-4818-a9b2-684ef006547b] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:41:43 - INFO - [145be8a8-9764-4818-a9b2-684ef006547b] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:41:43 - INFO - [145be8a8-9764-4818-a9b2-684ef006547b] 30 frames saved to temp_videos/145be8a8-9764-4818-a9b2-684ef006547b +2025-08-22 05:41:43 - INFO - Prompt token length: 2330 +2025-08-22 05:41:49 - INFO - Tokens per second: 15.592137334361555, Peak GPU memory MB: 6678.375 +2025-08-22 05:41:49 - INFO - [145be8a8-9764-4818-a9b2-684ef006547b] Inference time: 6.37 seconds, CPU usage: 55.1%, CPU core utilization: [53.5, 53.5, 58.4, 54.9] +2025-08-22 05:41:49 - INFO - [145be8a8-9764-4818-a9b2-684ef006547b] Cleaned up temporary frame directory: temp_videos/145be8a8-9764-4818-a9b2-684ef006547b +2025-08-22 05:41:49 - INFO - [859ce3d5-f81a-43a0-afe3-d262192d6b84] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:41:49 - INFO - [859ce3d5-f81a-43a0-afe3-d262192d6b84] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:41:54 - INFO - [859ce3d5-f81a-43a0-afe3-d262192d6b84] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:41:54 - INFO - [859ce3d5-f81a-43a0-afe3-d262192d6b84] 30 frames saved to temp_videos/859ce3d5-f81a-43a0-afe3-d262192d6b84 +2025-08-22 05:41:54 - INFO - Prompt token length: 2330 +2025-08-22 05:42:00 - INFO - Tokens per second: 16.23932857727457, Peak GPU memory MB: 6678.375 +2025-08-22 05:42:00 - INFO - [859ce3d5-f81a-43a0-afe3-d262192d6b84] Inference time: 11.90 seconds, CPU usage: 53.6%, CPU core utilization: [53.6, 66.8, 41.3, 52.6] +2025-08-22 05:42:00 - INFO - [859ce3d5-f81a-43a0-afe3-d262192d6b84] Cleaned up temporary frame directory: temp_videos/859ce3d5-f81a-43a0-afe3-d262192d6b84 +2025-08-22 05:42:00 - INFO - [6737a709-d602-44c4-9693-a777732e9752] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:42:00 - INFO - [6737a709-d602-44c4-9693-a777732e9752] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:42:01 - INFO - [6737a709-d602-44c4-9693-a777732e9752] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:42:01 - INFO - [6737a709-d602-44c4-9693-a777732e9752] 30 frames saved to temp_videos/6737a709-d602-44c4-9693-a777732e9752 +2025-08-22 05:42:02 - INFO - Prompt token length: 2330 +2025-08-22 05:42:07 - INFO - Tokens per second: 16.156670595758122, Peak GPU memory MB: 6678.375 +2025-08-22 05:42:07 - INFO - [6737a709-d602-44c4-9693-a777732e9752] Inference time: 6.72 seconds, CPU usage: 35.3%, CPU core utilization: [14.6, 84.2, 14.5, 27.5] +2025-08-22 05:42:07 - INFO - [6737a709-d602-44c4-9693-a777732e9752] Cleaned up temporary frame directory: temp_videos/6737a709-d602-44c4-9693-a777732e9752 +2025-08-22 05:42:07 - INFO - [726a0f65-094a-4f6f-a0ff-4d139e96ac66] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:42:07 - INFO - [726a0f65-094a-4f6f-a0ff-4d139e96ac66] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:42:08 - INFO - [726a0f65-094a-4f6f-a0ff-4d139e96ac66] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:42:08 - INFO - [726a0f65-094a-4f6f-a0ff-4d139e96ac66] 30 frames saved to temp_videos/726a0f65-094a-4f6f-a0ff-4d139e96ac66 +2025-08-22 05:42:08 - INFO - Prompt token length: 2330 +2025-08-22 05:42:15 - INFO - Tokens per second: 16.20737149196603, Peak GPU memory MB: 6678.375 +2025-08-22 05:42:15 - INFO - [726a0f65-094a-4f6f-a0ff-4d139e96ac66] Inference time: 8.09 seconds, CPU usage: 33.9%, CPU core utilization: [17.3, 20.2, 42.4, 55.7] +2025-08-22 05:42:15 - INFO - [726a0f65-094a-4f6f-a0ff-4d139e96ac66] Cleaned up temporary frame directory: temp_videos/726a0f65-094a-4f6f-a0ff-4d139e96ac66 diff --git a/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_055712.log b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_055712.log new file mode 100644 index 0000000000000000000000000000000000000000..2c1a17501477b7750b4ea69d4e67f9103a026565 --- /dev/null +++ b/Direct_Transformers/logs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_055712.log @@ -0,0 +1,44 @@ +2025-08-22 05:57:12 - INFO - Loading model: Qwen/Qwen2.5-VL-3B-Instruct-AWQ +2025-08-22 05:57:15 - INFO - We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk). +2025-08-22 05:57:46 - INFO - Model loaded in 33.69 seconds +2025-08-22 05:57:46 - INFO - GPU Memory Usage after model load: 3250.55 MB +2025-08-22 05:57:46 - INFO - [b1f563ec-1a2c-4af7-bf88-7e54521bd16f] Processing video: 'videos/sample2.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:57:46 - INFO - [b1f563ec-1a2c-4af7-bf88-7e54521bd16f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:57:50 - INFO - [b1f563ec-1a2c-4af7-bf88-7e54521bd16f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:57:50 - INFO - [b1f563ec-1a2c-4af7-bf88-7e54521bd16f] 30 frames saved to temp_videos/b1f563ec-1a2c-4af7-bf88-7e54521bd16f +2025-08-22 05:57:50 - INFO - Prompt token length: 2330 +2025-08-22 05:57:55 - INFO - Tokens per second: 16.190728175434586, Peak GPU memory MB: 6678.375 +2025-08-22 05:57:55 - INFO - [b1f563ec-1a2c-4af7-bf88-7e54521bd16f] Inference time: 9.97 seconds, CPU usage: 43.4%, CPU core utilization: [47.7, 39.3, 46.6, 40.2] +2025-08-22 05:57:55 - INFO - [b1f563ec-1a2c-4af7-bf88-7e54521bd16f] Cleaned up temporary frame directory: temp_videos/b1f563ec-1a2c-4af7-bf88-7e54521bd16f +2025-08-22 05:57:55 - INFO - [f443de0e-ebd6-42c5-a8f3-428e02b21575] Processing video: 'videos/sample1_rotated_270.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:57:55 - INFO - [f443de0e-ebd6-42c5-a8f3-428e02b21575] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:57:56 - INFO - [f443de0e-ebd6-42c5-a8f3-428e02b21575] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:57:56 - INFO - [f443de0e-ebd6-42c5-a8f3-428e02b21575] 30 frames saved to temp_videos/f443de0e-ebd6-42c5-a8f3-428e02b21575 +2025-08-22 05:57:57 - INFO - Prompt token length: 2330 +2025-08-22 05:58:01 - INFO - Tokens per second: 15.958396581899965, Peak GPU memory MB: 6678.375 +2025-08-22 05:58:01 - INFO - [f443de0e-ebd6-42c5-a8f3-428e02b21575] Inference time: 5.56 seconds, CPU usage: 36.6%, CPU core utilization: [15.3, 52.5, 18.4, 60.0] +2025-08-22 05:58:01 - INFO - [f443de0e-ebd6-42c5-a8f3-428e02b21575] Cleaned up temporary frame directory: temp_videos/f443de0e-ebd6-42c5-a8f3-428e02b21575 +2025-08-22 05:58:01 - INFO - [b6079f9c-278c-459a-a37e-332d5ffdd6fd] Processing video: 'videos/sample1_raw.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:58:01 - INFO - [b6079f9c-278c-459a-a37e-332d5ffdd6fd] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:58:06 - INFO - [b6079f9c-278c-459a-a37e-332d5ffdd6fd] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:58:06 - INFO - [b6079f9c-278c-459a-a37e-332d5ffdd6fd] 30 frames saved to temp_videos/b6079f9c-278c-459a-a37e-332d5ffdd6fd +2025-08-22 05:58:06 - INFO - Prompt token length: 2330 +2025-08-22 05:58:13 - INFO - Tokens per second: 15.934521405551505, Peak GPU memory MB: 6678.375 +2025-08-22 05:58:13 - INFO - [b6079f9c-278c-459a-a37e-332d5ffdd6fd] Inference time: 11.96 seconds, CPU usage: 53.3%, CPU core utilization: [38.6, 41.4, 39.9, 93.3] +2025-08-22 05:58:13 - INFO - [b6079f9c-278c-459a-a37e-332d5ffdd6fd] Cleaned up temporary frame directory: temp_videos/b6079f9c-278c-459a-a37e-332d5ffdd6fd +2025-08-22 05:58:13 - INFO - [7480d34f-ab80-40de-bd90-e1b1e80ba723] Processing video: 'videos/sample1_rotated_180.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:58:13 - INFO - [7480d34f-ab80-40de-bd90-e1b1e80ba723] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:58:14 - INFO - [7480d34f-ab80-40de-bd90-e1b1e80ba723] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:58:14 - INFO - [7480d34f-ab80-40de-bd90-e1b1e80ba723] 30 frames saved to temp_videos/7480d34f-ab80-40de-bd90-e1b1e80ba723 +2025-08-22 05:58:14 - INFO - Prompt token length: 2330 +2025-08-22 05:58:20 - INFO - Tokens per second: 16.11702401546876, Peak GPU memory MB: 6678.375 +2025-08-22 05:58:20 - INFO - [7480d34f-ab80-40de-bd90-e1b1e80ba723] Inference time: 6.65 seconds, CPU usage: 35.0%, CPU core utilization: [13.4, 81.1, 15.8, 29.8] +2025-08-22 05:58:20 - INFO - [7480d34f-ab80-40de-bd90-e1b1e80ba723] Cleaned up temporary frame directory: temp_videos/7480d34f-ab80-40de-bd90-e1b1e80ba723 +2025-08-22 05:58:20 - INFO - [02f2404d-bf6d-4be9-aa69-471222e4550f] Processing video: 'videos/sample1_rotated_90.mp4', Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves.' +2025-08-22 05:58:20 - INFO - [02f2404d-bf6d-4be9-aa69-471222e4550f] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 05:58:21 - INFO - [02f2404d-bf6d-4be9-aa69-471222e4550f] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 05:58:21 - INFO - [02f2404d-bf6d-4be9-aa69-471222e4550f] 30 frames saved to temp_videos/02f2404d-bf6d-4be9-aa69-471222e4550f +2025-08-22 05:58:21 - INFO - Prompt token length: 2330 +2025-08-22 05:58:28 - INFO - Tokens per second: 15.820068362733391, Peak GPU memory MB: 6678.375 +2025-08-22 05:58:28 - INFO - [02f2404d-bf6d-4be9-aa69-471222e4550f] Inference time: 8.15 seconds, CPU usage: 34.6%, CPU core utilization: [13.5, 39.0, 14.4, 71.4] +2025-08-22 05:58:28 - INFO - [02f2404d-bf6d-4be9-aa69-471222e4550f] Cleaned up temporary frame directory: temp_videos/02f2404d-bf6d-4be9-aa69-471222e4550f diff --git a/Direct_Transformers/models/__pycache__/gemma.cpython-311.pyc b/Direct_Transformers/models/__pycache__/gemma.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1b2abb3f165c968abc9e2205e425d0dc00037e8d Binary files /dev/null and b/Direct_Transformers/models/__pycache__/gemma.cpython-311.pyc differ diff --git a/Direct_Transformers/models/__pycache__/lfm.cpython-311.pyc b/Direct_Transformers/models/__pycache__/lfm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cb02c4cd27f9fd812b1a58ee59bd7d7961b5ff14 Binary files /dev/null and b/Direct_Transformers/models/__pycache__/lfm.cpython-311.pyc differ diff --git a/Direct_Transformers/models/__pycache__/minicpm.cpython-311.pyc b/Direct_Transformers/models/__pycache__/minicpm.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0924b569a2c31dfb19dd32ce40459c1e11ca74ba Binary files /dev/null and b/Direct_Transformers/models/__pycache__/minicpm.cpython-311.pyc differ diff --git a/Direct_Transformers/models/__pycache__/qwen.cpython-311.pyc b/Direct_Transformers/models/__pycache__/qwen.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2d85f11692af2ec1e2568ff941ad70891d0635b9 Binary files /dev/null and b/Direct_Transformers/models/__pycache__/qwen.cpython-311.pyc differ diff --git a/Direct_Transformers/models/gemma.py b/Direct_Transformers/models/gemma.py new file mode 100644 index 0000000000000000000000000000000000000000..149d2a8d338d7542fee88b1e00a77f7c42972e8a --- /dev/null +++ b/Direct_Transformers/models/gemma.py @@ -0,0 +1,106 @@ +# pip install accelerate + +from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer +from PIL import Image +import requests +import torch +from threading import Thread +import logging +import time +import pynvml + +class Gemma: + def __init__(self, model_id): + self.model_id = model_id + self.model = Gemma3ForConditionalGeneration.from_pretrained( + model_id, device_map="auto", torch_dtype=torch.bfloat16 + ).eval() + self.processor = AutoProcessor.from_pretrained(model_id) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + start_time = time.time() + + messages = [ + { + "role": "system", + "content": [{"type": "text", "text": "You are a helpful assistant."}] + }, + + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}] + } + ] + + for image in video: + messages[1]["content"].append({"type": "image", "image": image}) + + print(messages) + inputs = self.processor.apply_chat_template( + messages, add_generation_prompt=True, tokenize=True, + return_dict=True, return_tensors="pt" + ).to(self.model.device) + + logging.info(f"Prompt token length: {len(inputs.input_ids[0])}") + + streamer = TextIteratorStreamer(self.processor, skip_prompt=True, skip_special_tokens=True) + + generation_kwargs = dict( + **inputs, + streamer=streamer, + max_new_tokens=512 + ) + + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) + thread.start() + + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in streamer: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + thread.join() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.processor.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } + diff --git a/Direct_Transformers/models/lfm.py b/Direct_Transformers/models/lfm.py new file mode 100644 index 0000000000000000000000000000000000000000..23a12e3c8d77c1a5c11eb0acc33320076159bfdf --- /dev/null +++ b/Direct_Transformers/models/lfm.py @@ -0,0 +1,104 @@ +from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer +from transformers.image_utils import load_image +from threading import Thread +import logging +import torch +import time +import pynvml + +class LFM2: + def __init__(self, model_id): + self.model_id = model_id + self.model = AutoModelForImageTextToText.from_pretrained( + model_id, + device_map="auto", + torch_dtype=torch.bfloat16, + trust_remote_code=True + ) + self.processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + start_time = time.time() + conversation = [ + { + "role": "user", + "content": [ + {"type": "text", "text": prompt}, + ], + }, + ] + # Assume video is a list of image paths + images = [load_image(image_path) for image_path in video] + for image in images: + conversation[0]["content"].append({"type": "image", "image": image}) + + # Generate Answer + inputs = self.processor.apply_chat_template( + conversation, + add_generation_prompt=True, + return_tensors="pt", + return_dict=True, + tokenize=True, + ).to(self.model.device) + + logging.info(f"Prompt token length: {len(inputs.input_ids[0])}") + streamer = TextIteratorStreamer(self.processor, skip_prompt=True, skip_special_tokens=True) + + generation_kwargs = dict( + **inputs, + streamer=streamer, + max_new_tokens=512 + ) + + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) + thread.start() + + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in streamer: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + thread.join() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.processor.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } + diff --git a/Direct_Transformers/models/minicpm.py b/Direct_Transformers/models/minicpm.py new file mode 100644 index 0000000000000000000000000000000000000000..776048870f73776b0546749c4a976ef9fc12e40e --- /dev/null +++ b/Direct_Transformers/models/minicpm.py @@ -0,0 +1,85 @@ +from PIL import Image +import torch +from transformers import AutoModel, AutoTokenizer, TextIteratorStreamer +from transformers.image_utils import load_image +from threading import Thread +import logging +import time +import pynvml + +class MiniCPM: + def __init__(self, model_id): + self.model_id = model_id + self.model = AutoModel.from_pretrained( + model_id, + trust_remote_code=True, + attn_implementation='sdpa', + torch_dtype=torch.bfloat16 + ) + self.model = self.model.eval().cuda() + self.tokenizer = AutoTokenizer.from_pretrained( + model_id, trust_remote_code=True + ) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + start_time = time.time() + + images = [Image.open(frame).convert('RGB') for frame in video] + content = images + [prompt] + msgs = [{'role': 'user', 'content': content}] + + # MiniCPM's chat method handles streaming internally + res = self.model.chat( + image=None, + msgs=msgs, + tokenizer=self.tokenizer, + stream=True + ) + + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in res: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } diff --git a/Direct_Transformers/models/qwen.py b/Direct_Transformers/models/qwen.py new file mode 100644 index 0000000000000000000000000000000000000000..a0e12cac2c55a1d22e460316d7f5cff3416a61a1 --- /dev/null +++ b/Direct_Transformers/models/qwen.py @@ -0,0 +1,115 @@ +from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor, Qwen2_5_VLForConditionalGeneration +from qwen_vl_utils import process_vision_info +from transformers import TextIteratorStreamer +from threading import Thread +import logging +import torch +import time +import pynvml + +class Qwen2VL: + def __init__(self, model_id): + self.model_id = model_id + if "2.5" in model_id: + self.model = Qwen2_5_VLForConditionalGeneration.from_pretrained( + model_id, torch_dtype="float16", device_map="auto" + ) + else: + self.model = Qwen2VLForConditionalGeneration.from_pretrained( + model_id, torch_dtype="float16", device_map="auto" + ) + self.processor = AutoProcessor.from_pretrained(model_id) + + self.handle = None + if torch.cuda.is_available(): + try: + pynvml.nvmlInit() + device_id = next(self.model.parameters()).device.index + self.handle = pynvml.nvmlDeviceGetHandleByIndex(device_id) + except Exception as e: + logging.error(f"Failed to initialize NVML: {e}") + + def __del__(self): + if hasattr(self, 'handle') and self.handle: + try: + pynvml.nvmlShutdown() + except: + pass + + def generate(self, video, prompt): + + start_time = time.time() + + # Preparation for inference + video_paths = [f"file://{path}" for path in video] + messages = [ + { + "role": "user", + "content": [ + { + "type": "video", + "video": video_paths, + "resized_height": 280, + "resized_width": 420, + }, + {"type": "text", "text": prompt}, + ], + } + ] + text = self.processor.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + image_inputs, video_inputs = process_vision_info(messages) + inputs = self.processor( + text=[text], + images=image_inputs, + videos=video_inputs, + padding=True, + return_tensors="pt", + ) + inputs = inputs.to("cuda") + logging.info(f"Prompt token length: {len(inputs.input_ids[0])}") + streamer = TextIteratorStreamer(self.processor, skip_prompt=True, skip_special_tokens=True) + + generation_kwargs = dict( + **inputs, + streamer=streamer, + max_new_tokens=256 + ) + + thread = Thread(target=self.model.generate, kwargs=generation_kwargs) + thread.start() + full_response = "" + print("Response: ", end="") + first_token_time = None + for new_text in streamer: + if first_token_time is None: + first_token_time = time.time() + full_response += new_text + print(new_text, end="", flush=True) + print() + thread.join() + + end_time = time.time() + + if first_token_time is not None: + generation_time = end_time - first_token_time + else: + generation_time = 0 + + num_generated_tokens = len(self.processor.tokenizer(full_response).input_ids) + tokens_per_second = num_generated_tokens / generation_time if generation_time > 0 else 0 + + peak_memory_mb = 0 + if self.handle: + mem_info = pynvml.nvmlDeviceGetMemoryInfo(self.handle) + peak_memory_mb = mem_info.used / (1024 * 1024) + + return { + "response": full_response, + "tokens_per_second": tokens_per_second, + "peak_gpu_memory_mb": peak_memory_mb, + "num_generated_tokens": num_generated_tokens, + } + diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_044952.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_044952.json new file mode 100644 index 0000000000000000000000000000000000000000..a9443368b20dea3e34ed86bf8db80c5ce367cc61 --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_044952.json @@ -0,0 +1,62 @@ +{ + "sample1_rotated_270.mp4": { + "response": "A man enters the convenience store and walks to a counter. He sits down on the floor, then stands up briefly before sitting back down again. The scene is static for some time with no other significant actions taking place. A person wearing blue enters the frame from behind the counter and interacts with the seated man by placing something in his hand.", + "tokens_per_second": 5.480411872376593, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 69, + "inference_time": 33.08904457092285, + "cpu_usage": 48.6, + "cpu_core_utilization": [ + 46.3, + 44.5, + 50.7, + 52.9 + ], + "request_id": "d017fd76-3fd3-43ac-96a2-634c37d06506" + }, + "sample1_raw.mp4": { + "response": "The video captures a sequence of events in a convenience store. A man wearing blue clothing is seen working behind the counter, handling various tasks such as taking an item from a customer and interacting with another employee at the register. Another individual enters the scene carrying items including a yellow box and some bags, places them on the floor near the entrance, then proceeds to interact with the cashier before exiting through the door. The background remains consistent throughout the clip, showing shelves stocked with products and a tiled floor.", + "tokens_per_second": 7.228105259193224, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 104, + "inference_time": 32.255831241607666, + "cpu_usage": 37.7, + "cpu_core_utilization": [ + 49.7, + 33.2, + 23.0, + 45.0 + ], + "request_id": "39153060-6451-46d2-bd79-3907b25078cd" + }, + "sample1_rotated_180.mp4": { + "response": "A man in a white shirt walks into the frame. He sits down at the counter and begins to use his phone. Another person enters the scene, wearing blue clothing. The first man hands something over to this second individual.", + "tokens_per_second": 4.33591342429363, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 46, + "inference_time": 24.36067843437195, + "cpu_usage": 32.9, + "cpu_core_utilization": [ + 41.7, + 14.4, + 17.5, + 58.0 + ], + "request_id": "17f20038-cd68-4523-9ca8-e568483c3597" + }, + "sample1_rotated_90.mp4": { + "response": "The video captures a scene inside what appears to be a convenience store or similar retail environment. The main focus is on the counter area where various items are displayed, including hats and other merchandise. Throughout the sequence of frames provided, there isn't significant change in terms of actions taking place at the counter except for one individual who enters the frame from the top side. This person seems to interact with something out of view possibly related to their work at this location. No other people appear actively engaged in tasks within these particular moments captured by the footage; they might simply pass through or stand nearby but do not participate directly in any visible transactions or activities around the display areas.", + "tokens_per_second": 8.020485206497014, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 134, + "inference_time": 31.392521619796753, + "cpu_usage": 42.2, + "cpu_core_utilization": [ + 23.0, + 30.6, + 48.4, + 66.5 + ], + "request_id": "00745cac-3194-4425-9e97-9a6df15d32b5" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_055248.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_055248.json new file mode 100644 index 0000000000000000000000000000000000000000..25ca529b79e04dccfd78eb4cb5084bc463b49bfa --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_055248.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "Customers are making purchases and interacting with the cashier at a convenience store. The scene unfolds over several seconds, capturing various interactions and activities within the store. Initially, two customers approach the counter to make their transactions. One customer is dressed in black while the other wears light-colored attire. As they engage with the cashier, another individual wearing blue appears in the background.\n\nThroughout the sequence, different individuals enter or exit the frame: a man in a gray shirt carrying a bag, followed by an older gentleman. These patrons interact briefly before moving on. A new figure in beige enters from behind the shelves of snacks but exits shortly afterward. The person who entered last leaves as well, leaving one patron remaining near the register area.\n\nThe video captures routine retail activity—customers placing items into bags, receiving change back, scanning barcodes, paying for goods using credit cards, retrieving products like water bottles, and handling small packages such as candy bars. The transaction process concludes when all parties have left the vicinity of the checkout area except for the staff member still engaged in closing down operations.", + "tokens_per_second": 9.627250655320038, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 230, + "inference_time": 44.06033158302307, + "cpu_usage": 23.0, + "cpu_core_utilization": [ + 16.7, + 19.6, + 22.1, + 33.5 + ], + "request_id": "d42855a3-8e48-4035-a53f-8de3090c06ed" + }, + "sample1_rotated_270.mp4": { + "response": "Customer comes in, looks around. Employee behind the counter picks up a bag from floor and puts it on table. Customer takes out money to pay for his purchase.", + "tokens_per_second": 3.57929410621367, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 34, + "inference_time": 23.264695167541504, + "cpu_usage": 29.8, + "cpu_core_utilization": [ + 27.5, + 64.4, + 7.3, + 19.8 + ], + "request_id": "ac79ae0e-7b1a-40bf-b620-f6c90f5770d6" + }, + "sample1_raw.mp4": { + "response": "The video depicts a convenience store scene with various activities occurring simultaneously. A man in blue is interacting at the cash register, while another individual wearing a light-colored shirt and carrying a backpack stands nearby on the left side of the frame. In the background, there's an escalator moving upwards alongside other shoppers traversing it. The camera angle offers a comprehensive view from above into the retail space where numerous items are displayed on shelves lining both sides of the counter area occupied by the staff member handling transactions.", + "tokens_per_second": 7.079099419479299, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 103, + "inference_time": 32.53696012496948, + "cpu_usage": 37.9, + "cpu_core_utilization": [ + 18.0, + 24.0, + 55.7, + 54.0 + ], + "request_id": "304d70b6-ebf9-4d42-b6c7-d5933fe8b991" + }, + "sample1_rotated_180.mp4": { + "response": "Customer walks in, looks at items on the shelves. Employee engages with customer and shows her what she wants to buy. Customer checks price tag of item and decides whether or not it is worth buying. Employee scans barcode for purchase transaction.", + "tokens_per_second": 4.668429203015315, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 50, + "inference_time": 24.504019260406494, + "cpu_usage": 29.9, + "cpu_core_utilization": [ + 36.4, + 27.0, + 24.5, + 31.9 + ], + "request_id": "ef3c69d1-c1a7-425a-a488-809429a78f15" + }, + "sample1_rotated_90.mp4": { + "response": "In the convenience store, a man in white shirt and blue shorts is seen inspecting various items on display. He browses through different products, occasionally picking up or moving them to examine closer. The video captures his actions as he navigates through the merchandise with focused attention.", + "tokens_per_second": 5.1673857196625175, + "peak_gpu_memory_mb": 9236.375, + "num_generated_tokens": 58, + "inference_time": 25.009957790374756, + "cpu_usage": 30.0, + "cpu_core_utilization": [ + 53.4, + 44.7, + 12.5, + 9.1 + ], + "request_id": "d836cd40-b5dc-4621-8bea-ae0762c669fa" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110321.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110321.json new file mode 100644 index 0000000000000000000000000000000000000000..0ffc2b8c43f078af990e14498a96688180f99c35 --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110321.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "1. A person in a gray hoodie interacts with the counter, possibly making a purchase or asking for assistance.\n2. Another individual wearing black reaches out towards items on the counter.\n3. The scene includes various snacks displayed prominently in the background.\n4. An employee at the cash register appears to be handling transactions and interacting with customers.\n5. Throughout the sequence, different individuals are seen engaging with each other and the store environment, including browsing products and moving around within the convenience store setting.", + "tokens_per_second": 11.493479553556993, + "peak_gpu_memory_mb": 5852.375, + "num_generated_tokens": 106, + "inference_time": 17.250617742538452, + "cpu_usage": 44.5, + "cpu_core_utilization": [ + 52.4, + 44.4, + 41.4, + 39.7 + ], + "request_id": "850d2e5c-0cec-493c-8ce6-c55f9b80e685" + }, + "sample1_rotated_270.mp4": { + "response": "In the video, a person is seen handling various items on a counter. Initially, they are arranging or selecting products from different categories displayed behind them. As time progresses, this individual appears to be interacting with another person who enters the frame and begins examining items laid out in front of the store counter. Throughout these actions, there seems to be no significant change in the arrangement of goods visible through the window-like partition behind the main counter. The focus remains primarily on the transactions happening at the counter area without notable alterations in the background setting or organization of merchandise.", + "tokens_per_second": 12.456803223684428, + "peak_gpu_memory_mb": 5852.375, + "num_generated_tokens": 115, + "inference_time": 11.520845413208008, + "cpu_usage": 28.2, + "cpu_core_utilization": [ + 7.4, + 39.4, + 19.4, + 46.8 + ], + "request_id": "60f24278-4a3e-40a5-b6ca-91d948dccefd" + }, + "sample1_raw.mp4": { + "response": "The video captures a sequence of events in a convenience store. Initially, an individual is seen interacting with items on the counter. As time progresses, another person enters the frame and engages with the first individual at the counter. The interaction appears to involve transactions or exchanges of goods. Throughout the clip, various products are visible on the shelves behind the counter. The camera angle remains fixed throughout the entire sequence, providing a comprehensive view of the activities occurring within the store.", + "tokens_per_second": 12.085148643257336, + "peak_gpu_memory_mb": 5852.375, + "num_generated_tokens": 95, + "inference_time": 10.628119230270386, + "cpu_usage": 31.5, + "cpu_core_utilization": [ + 35.8, + 14.0, + 22.2, + 54.1 + ], + "request_id": "69a97693-6053-4c05-946f-6feb3c853a6f" + }, + "sample1_rotated_180.mp4": { + "response": "In the video, a person wearing light blue is interacting with items on the counter. They appear to be handling or organizing something near the cash register area. Another individual in darker clothing enters the scene and approaches the first person. The interaction between them involves some form of exchange at the counter, likely involving transactions or inquiries about products displayed behind it.", + "tokens_per_second": 11.539003114652623, + "peak_gpu_memory_mb": 5852.375, + "num_generated_tokens": 70, + "inference_time": 8.36092495918274, + "cpu_usage": 28.5, + "cpu_core_utilization": [ + 7.8, + 59.5, + 26.3, + 20.3 + ], + "request_id": "79c77958-73b6-448a-b016-c5817b5973f1" + }, + "sample1_rotated_90.mp4": { + "response": "In the video, a person is seen browsing through items on display in what appears to be a convenience store. The individual examines various products laid out on shelves and tables. At one point, they bend down slightly as if getting a closer look at something or possibly picking up an item. Towards the end of the sequence, this same person interacts with another individual who seems to be either assisting them or engaging in conversation while standing nearby. Throughout the clip, different sections of the store are visible including neatly arranged merchandise and displays that feature multiple colors and types of goods.", + "tokens_per_second": 12.248593250808584, + "peak_gpu_memory_mb": 5852.375, + "num_generated_tokens": 115, + "inference_time": 11.675127983093262, + "cpu_usage": 29.9, + "cpu_core_utilization": [ + 23.1, + 18.2, + 14.5, + 63.9 + ], + "request_id": "702f19f1-1586-4d4d-8814-d3d4a9d67561" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110454.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110454.json new file mode 100644 index 0000000000000000000000000000000000000000..b0d6f14cef4385eccfb19e7fc8baea4b909a45b5 --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110454.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "The video depicts a sequence of events at a convenience store counter. A customer in a light blue shirt interacts with the cashier, who is wearing black attire and handling transactions on the computer. Another individual dressed in dark clothing stands near the shelves stocked with various items. The timestamp indicates that these interactions occur over several seconds from 23:52 to 23:53 on January 1, 2025. Throughout the clips, different individuals enter or exit the frame, engaging with the environment around the counter area.", + "tokens_per_second": 9.96703162358834, + "peak_gpu_memory_mb": 7752.375, + "num_generated_tokens": 112, + "inference_time": 21.649349689483643, + "cpu_usage": 68.2, + "cpu_core_utilization": [ + 66.8, + 69.2, + 64.3, + 72.7 + ], + "request_id": "2a91f41b-eac8-4c88-9b5e-9707198412eb" + }, + "sample1_rotated_270.mp4": { + "response": "In the convenience store, a person is seen picking up items from a table and placing them into their backpack. Another individual in a blue shirt appears to be assisting or interacting with this process. The scene captures moments of interaction between these two individuals as they handle objects on the counter.", + "tokens_per_second": 9.183890722932835, + "peak_gpu_memory_mb": 7752.375, + "num_generated_tokens": 57, + "inference_time": 10.761897325515747, + "cpu_usage": 29.2, + "cpu_core_utilization": [ + 13.0, + 31.1, + 6.2, + 66.8 + ], + "request_id": "41eac123-16c5-4d05-acfd-9b7ecd52e1e1" + }, + "sample1_raw.mp4": { + "response": "The video captures a sequence of events inside a convenience store. It begins with an overhead view showing the interior layout, including shelves stocked with various items and a counter area where transactions occur. The timestamp on the footage indicates it is from Wednesday, May 3, 2025.\n\nIn the initial frames, there are no visible customers or staff engaging in any specific activities at the time-stamped 00:13:52. The scene remains static until approximately 00:14:10 when a person wearing a light blue shirt appears near the left side of the frame, seemingly interacting with something out of view.\n\nAs the video progresses to 00:14:16, this individual moves slightly closer to the camera, still engaged in their activity. By 00:14:22, another person dressed in dark clothing enters the frame from the upper part, walking towards the right side. This new arrival interacts briefly with the first individual before moving further into the background by 00:14:28.\n\nAt 00:14:32, attention shifts back to the transaction process as two individuals appear; one behind the counter likely performing duties while the other approaches for what seems like a purchase. The final moments captured at 00:14:40 show these two continuing their interaction at the counter. Throughout the clip, the environment stays consistent without significant changes apart from the movements and interactions described above.", + "tokens_per_second": 12.75781937284626, + "peak_gpu_memory_mb": 7762.375, + "num_generated_tokens": 312, + "inference_time": 30.210939168930054, + "cpu_usage": 30.8, + "cpu_core_utilization": [ + 13.1, + 40.8, + 59.1, + 9.9 + ], + "request_id": "671f2493-4cbc-4854-96ac-3711a296d91d" + }, + "sample1_rotated_180.mp4": { + "response": "The video captures a sequence of events inside a convenience store. It begins with an empty counter and shelves stocked with various items, including boxes and small merchandise. A person wearing a blue shirt is seated at the counter on the right side of the frame. Another individual in a light-colored top enters from the left and sits down next to the first person. The two engage in conversation while one of them handles some paperwork or documents on the counter. Throughout the interaction, there are no significant changes to the environment or arrangement of items on the shelves.", + "tokens_per_second": 11.150383906582048, + "peak_gpu_memory_mb": 7762.375, + "num_generated_tokens": 111, + "inference_time": 14.525296211242676, + "cpu_usage": 28.4, + "cpu_core_utilization": [ + 43.1, + 5.1, + 32.5, + 32.6 + ], + "request_id": "a2c33b38-df7f-4f96-ac5f-85d8c75507ee" + }, + "sample1_rotated_90.mp4": { + "response": "In the video, a person wearing light blue clothing is seen moving around and interacting with items on a counter. There are various objects spread out on the shelves behind them. The individual appears to be organizing or handling different products displayed in an orderly manner.", + "tokens_per_second": 8.76210837152812, + "peak_gpu_memory_mb": 7762.375, + "num_generated_tokens": 52, + "inference_time": 10.507242918014526, + "cpu_usage": 29.3, + "cpu_core_utilization": [ + 7.8, + 96.6, + 5.7, + 6.4 + ], + "request_id": "a590da60-1f05-494d-b772-a95b92185462" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110635.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110635.json new file mode 100644 index 0000000000000000000000000000000000000000..b28341ac370564203b6cc82266b2af7389ccc394 --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110635.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "The video depicts a sequence of events at a convenience store checkout counter. Initially, two individuals are present: one behind the counter and another standing in line with items to purchase. As the video progresses, various customers enter the scene and interact with the cashier. The interactions involve placing or picking up items, making payments, and waiting their turn. Throughout the clip, there is consistent activity around the counter area as different people come and go.", + "tokens_per_second": 8.219991139097228, + "peak_gpu_memory_mb": 9422.375, + "num_generated_tokens": 90, + "inference_time": 27.526339530944824, + "cpu_usage": 54.8, + "cpu_core_utilization": [ + 47.4, + 63.2, + 44.9, + 63.6 + ], + "request_id": "9c79b8e0-c117-48ce-87d7-0f69fe4b813f" + }, + "sample1_rotated_270.mp4": { + "response": "People are shopping at a convenience store. A man is browsing through the shelves and picking up items to put them in his bag. He puts on a hat, then takes off the hat again and continues looking for more things to buy. Another person walks by carrying some bags.", + "tokens_per_second": 6.727455931746866, + "peak_gpu_memory_mb": 9422.375, + "num_generated_tokens": 56, + "inference_time": 17.57825207710266, + "cpu_usage": 29.4, + "cpu_core_utilization": [ + 36.6, + 7.3, + 68.2, + 5.1 + ], + "request_id": "930de0f2-3168-402b-bf54-ae4b35e66d88" + }, + "sample1_raw.mp4": { + "response": "In the convenience store, a customer is seen selecting items from shelves. A security guard enters and surveys the scene while holding a notepad. The camera captures different angles of the interaction between the security guard and the cashier as they process the transaction.", + "tokens_per_second": 6.369975503038201, + "peak_gpu_memory_mb": 9422.375, + "num_generated_tokens": 52, + "inference_time": 19.862124919891357, + "cpu_usage": 38.2, + "cpu_core_utilization": [ + 37.0, + 40.9, + 21.5, + 53.7 + ], + "request_id": "94b78871-ffda-4079-8563-cba6458604e0" + }, + "sample1_rotated_180.mp4": { + "response": "People walk in and out of frame. A man sits at the counter, picks up a box and puts it back down. He points to something on the shelf behind him. Another person enters the scene and walks past the camera as he talks with the seated man.", + "tokens_per_second": 6.581258041607729, + "peak_gpu_memory_mb": 9422.375, + "num_generated_tokens": 54, + "inference_time": 17.512973070144653, + "cpu_usage": 29.6, + "cpu_core_utilization": [ + 27.6, + 18.4, + 31.1, + 41.1 + ], + "request_id": "941b1262-a196-4fea-80d5-3190ee638fe5" + }, + "sample1_rotated_90.mp4": { + "response": "A person wearing a white shirt and blue shorts is sitting on the floor. There are many hats displayed in rows, as well as other items such as sunglasses and books. The man picks up an item from the display case then sits back down to continue organizing things.", + "tokens_per_second": 6.564398995813938, + "peak_gpu_memory_mb": 9422.375, + "num_generated_tokens": 55, + "inference_time": 17.651288747787476, + "cpu_usage": 29.3, + "cpu_core_utilization": [ + 6.9, + 23.7, + 5.5, + 80.6 + ], + "request_id": "3f721ab9-2e0c-4b0e-a760-9ded5da8519b" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110827.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110827.json new file mode 100644 index 0000000000000000000000000000000000000000..821a3b50b28fbf2d2d583419eec58dcb4cef223b --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_110827.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "Customers approach the counter, place their orders at the register, pay for their items, and then leave with their purchases. The cashier rings up each purchase using a scanner or point-of-sale system, processes payments from customers' cards, bags the customer's purchased goods in plastic bags, weighs them on a scale if necessary, checks prices to ensure accuracy, scans barcodes, and packs products into clear plastic bags.", + "tokens_per_second": 6.54134801287868, + "peak_gpu_memory_mb": 9354.375, + "num_generated_tokens": 87, + "inference_time": 35.413445472717285, + "cpu_usage": 60.7, + "cpu_core_utilization": [ + 50.1, + 53.8, + 54.9, + 84.1 + ], + "request_id": "08a603cf-ef2a-43b7-8a57-51d776507de1" + }, + "sample1_rotated_270.mp4": { + "response": "Customer enters the frame and sits at the counter.\nCustomer puts on a hat.\nCustomer picks up an item, examines it closely for about ten seconds.\nCustomer places the item back down.\nCustomer reaches out to grab something from behind the customer service window.\nCustomer hands money over to employee who then counts it and returns change.\nEmployee looks in box under the counter, grabs item, passes it across counter to customer.", + "tokens_per_second": 6.748738122493609, + "peak_gpu_memory_mb": 9354.375, + "num_generated_tokens": 90, + "inference_time": 27.117986917495728, + "cpu_usage": 29.2, + "cpu_core_utilization": [ + 21.0, + 23.0, + 51.7, + 20.9 + ], + "request_id": "10420b35-ed99-4649-95f4-1c15ec853a25" + }, + "sample1_raw.mp4": { + "response": "In this convenience store, a man wearing blue clothes is standing in the middle of the counter. He has his hand on something and appears to be talking with him. The customer's arm extends out from behind some items at the front of the counter. A bald-headed security guard enters frame left holding up a cell phone and then looks around as he walks towards the right side of the screen. Another person comes into view from the top edge of the image and stands near the back wall for a moment before walking toward the center of the scene. Finally, two more people enter frame left; one bends down slightly while reaching forward and placing an item onto the counter.", + "tokens_per_second": 8.176941134255504, + "peak_gpu_memory_mb": 9354.375, + "num_generated_tokens": 137, + "inference_time": 34.64553380012512, + "cpu_usage": 36.7, + "cpu_core_utilization": [ + 44.8, + 58.3, + 26.0, + 17.8 + ], + "request_id": "f7bceeb1-1e06-49db-becd-f87ec9ca6f0c" + }, + "sample1_rotated_180.mp4": { + "response": "Customer comes in, looks around.\nEmployee talks to customer while looking at register. Customer checks out with employee and leaves store.", + "tokens_per_second": 3.0094393528369956, + "peak_gpu_memory_mb": 9354.375, + "num_generated_tokens": 27, + "inference_time": 22.807881832122803, + "cpu_usage": 29.6, + "cpu_core_utilization": [ + 38.7, + 14.6, + 58.5, + 6.4 + ], + "request_id": "bc80fbca-3475-45fb-87f2-3004d7fa9ea6" + }, + "sample1_rotated_90.mp4": { + "response": "A person is sorting through a pile of clothing items.", + "tokens_per_second": 1.520139023080047, + "peak_gpu_memory_mb": 9354.375, + "num_generated_tokens": 12, + "inference_time": 21.718321084976196, + "cpu_usage": 30.3, + "cpu_core_utilization": [ + 20.7, + 12.5, + 32.2, + 55.6 + ], + "request_id": "77c278a9-142b-4240-91e0-72eeba53fdd8" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111102.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111102.json new file mode 100644 index 0000000000000000000000000000000000000000..1651f27766f1e4335a3b4ef34f792c1868884026 --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111102.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "Customers queue up at the counter to purchase items. The cashier rings them up and hands over their purchases. Some customers exit, while others enter the store.", + "tokens_per_second": 2.5171303106196135, + "peak_gpu_memory_mb": 11526.375, + "num_generated_tokens": 33, + "inference_time": 40.74959397315979, + "cpu_usage": 77.6, + "cpu_core_utilization": [ + 71.4, + 79.1, + 84.1, + 75.8 + ], + "request_id": "897f9022-0d69-4ab8-b312-e70bff671e01" + }, + "sample1_rotated_270.mp4": { + "response": "Customer enters store, looks around. Employee picks up item from counter and gives it to customer. Customer leaves with the item. Employee walks over to another part of the shelf and begins rearranging items on display there.", + "tokens_per_second": 3.2227324826478276, + "peak_gpu_memory_mb": 11526.375, + "num_generated_tokens": 44, + "inference_time": 32.98369121551514, + "cpu_usage": 37.1, + "cpu_core_utilization": [ + 27.5, + 27.7, + 40.9, + 52.2 + ], + "request_id": "d2ac705f-4d82-47f2-a0d0-c0fdb814baca" + }, + "sample1_raw.mp4": { + "response": "The video shows a convenience store with various items on the shelves. A man in blue is standing behind the counter, and he interacts with different people who come into the shop to make purchases or ask questions about products. The footage captures their interactions as they approach the counter and engage with the staff member.", + "tokens_per_second": 4.038851709599647, + "peak_gpu_memory_mb": 11526.375, + "num_generated_tokens": 61, + "inference_time": 38.78435945510864, + "cpu_usage": 37.8, + "cpu_core_utilization": [ + 22.1, + 64.5, + 18.3, + 46.1 + ], + "request_id": "af12b757-3a79-42ce-930b-8a500462135b" + }, + "sample1_rotated_180.mp4": { + "response": "Customer walks into store. Employee takes inventory of customer s purchases and scans items with a scanner. Customer pays for his or her merchandise at the cash register, employee processes payment, hands money to customer.", + "tokens_per_second": 2.9283473152290984, + "peak_gpu_memory_mb": 11526.375, + "num_generated_tokens": 41, + "inference_time": 32.499228954315186, + "cpu_usage": 51.3, + "cpu_core_utilization": [ + 48.0, + 38.8, + 78.1, + 40.1 + ], + "request_id": "9491609f-1203-42a7-ba7d-d1b6dddc9afc" + }, + "sample1_rotated_90.mp4": { + "response": "In this clip, a man is seen walking into the frame and sitting on the floor. He then picks up an item from one of the shelves behind him before placing it back down again. Afterwards, he grabs another object off the shelf and places it next to the first item. Finally, he takes yet another item out of the display case and sets it beside the other two objects.", + "tokens_per_second": 4.766323941506251, + "peak_gpu_memory_mb": 11526.375, + "num_generated_tokens": 80, + "inference_time": 35.715086221694946, + "cpu_usage": 43.7, + "cpu_core_utilization": [ + 39.6, + 40.6, + 64.6, + 29.9 + ], + "request_id": "2774fd16-e3bd-4beb-bfd1-c13ed84ba843" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111417.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111417.json new file mode 100644 index 0000000000000000000000000000000000000000..6cd939bdd7460cd0e7a2d3c91289108859ace87e --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111417.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "Customers visit a convenience store and pay for their items. A man wearing light blue pushes the counter with his left hand while he is paying at the cash register. The customer in black clothes receives something from another person, who then leaves. The woman in beige walks forward slowly towards the right side of the frame as other customers enter the scene one by one.", + "tokens_per_second": 3.636909184972061, + "peak_gpu_memory_mb": 12718.375, + "num_generated_tokens": 74, + "inference_time": 54.29106378555298, + "cpu_usage": 52.7, + "cpu_core_utilization": [ + 46.7, + 54.8, + 68.7, + 40.6 + ], + "request_id": "77df7ba2-e74c-403e-adf9-6849dcd7550d" + }, + "sample1_rotated_270.mp4": { + "response": "Customer enters the convenience store and proceeds to a counter. Customer sits on floor next to counter, puts hand in pocket, reaches for something under cash register. Cashier retrieves item from behind counter, hands it over to customer. Customer stands up, takes item.", + "tokens_per_second": 2.947881774503077, + "peak_gpu_memory_mb": 12718.375, + "num_generated_tokens": 54, + "inference_time": 41.44351029396057, + "cpu_usage": 30.3, + "cpu_core_utilization": [ + 68.7, + 22.6, + 21.7, + 8.2 + ], + "request_id": "825ed41a-2804-41d9-882c-3d8381ec27ce" + }, + "sample1_raw.mp4": { + "response": "Visible in the video are a man and woman standing behind the counter of a convenience store. The shelves on both sides contain various items such as snacks, drinks, and other products commonly found in convenience stores. Throughout the footage, there is no significant change to the arrangement or appearance of these objects.\nThe man appears to be taking pictures with his mobile phone throughout most of the clip. He then proceeds to scan an item using his device before handing it over to the female cashier who is wearing blue attire. After receiving the product from him, she scans its barcode and places it into her bag. This process repeats itself several times until he finishes scanning all remaining items for sale.", + "tokens_per_second": 5.257369693058746, + "peak_gpu_memory_mb": 12718.375, + "num_generated_tokens": 141, + "inference_time": 56.382145404815674, + "cpu_usage": 36.4, + "cpu_core_utilization": [ + 60.8, + 27.2, + 40.1, + 17.2 + ], + "request_id": "457f524d-183d-498a-bddb-3bf921791a74" + }, + "sample1_rotated_180.mp4": { + "response": "The video begins with a convenience store camera angle. There are no people in the frame, and it is empty for some time until a man enters from right of the frame to sit down at the counter. He sits there while another person comes into view walking towards him on his left side. The second individual walks around behind the first sitting man before reaching out and grabbing something near his face. Then he puts that object back onto the table. The standing man then reaches up toward the ceiling and pulls an item off of the shelf above their heads. Afterwards, he grabs two items from the shelves next to them. Both men take those objects to place them inside the previously grabbed object they had been holding. They both continue placing these new items together as the other stands beside them watching.", + "tokens_per_second": 5.539234909838161, + "peak_gpu_memory_mb": 12718.375, + "num_generated_tokens": 160, + "inference_time": 52.02853178977966, + "cpu_usage": 29.2, + "cpu_core_utilization": [ + 41.4, + 8.4, + 58.5, + 8.4 + ], + "request_id": "110b198a-4fb3-4168-a92c-4a1afe6ed016" + }, + "sample1_rotated_90.mp4": { + "response": "In the video, a man is seen wearing blue shorts and a white t-shirt. He enters from frame left with some items in his hand. The shelves are filled with various products including hats of different colors such as red, yellow, pink, grey, black, etc. There is also an umbrella on one side of the shelf which has a picture printed onto it that looks like a shark fin. As he walks forward towards the back of the shop, he bends down to pick up something off the floor before standing straight again and reaching out for another item near him.", + "tokens_per_second": 4.7551999525578434, + "peak_gpu_memory_mb": 12718.375, + "num_generated_tokens": 117, + "inference_time": 47.791791915893555, + "cpu_usage": 28.7, + "cpu_core_utilization": [ + 6.5, + 9.0, + 54.5, + 44.7 + ], + "request_id": "7b55f145-f8fa-4977-bedd-c078760b3c56" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111842.json b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111842.json new file mode 100644 index 0000000000000000000000000000000000000000..7c486202d2c792f6ff286a049b80cf4da520bcf0 --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4-int4/20250822_111842.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "Two men are at the counter of a convenience store. The man behind the counter scans items and processes payments for customers who come up to him with their purchases. Customers place items on the counter, hand over money or cards, receive change, and then leave by walking towards the shelves in the back of the store.", + "tokens_per_second": 2.62416869979478, + "peak_gpu_memory_mb": 14174.375, + "num_generated_tokens": 64, + "inference_time": 64.16653180122375, + "cpu_usage": 67.6, + "cpu_core_utilization": [ + 67.3, + 63.0, + 71.0, + 68.9 + ], + "request_id": "7775eecd-8936-4c41-86d5-7f025a398a9b" + }, + "sample1_rotated_270.mp4": { + "response": "The video shows a man in blue shirt and hat entering the convenience store. He is holding something in his hand, which he places on the counter of the cash register. The man then lies down across the cash register area with his head resting on the table. After a moment, another person wearing a white t-shirt enters the scene and starts to pick up items from under the lying man's body.", + "tokens_per_second": 3.1184040645544244, + "peak_gpu_memory_mb": 14174.375, + "num_generated_tokens": 83, + "inference_time": 54.36964297294617, + "cpu_usage": 29.1, + "cpu_core_utilization": [ + 28.7, + 10.4, + 71.4, + 5.8 + ], + "request_id": "90697562-2d13-486b-a271-d402cbd73e1c" + }, + "sample1_raw.mp4": { + "response": "A man in a blue shirt is standing behind the counter of a convenience store. He is talking to another person who has their hand on his arm and whose head can be seen inside the frame. The other person walks around, then sits down at the end of the counter. They look up at something as he points it towards them. A bald man wearing a white shirt appears from left frame, holding some kind of device. He looks through the camera with the item that the seated individual was pointing before looking back at the screen again.", + "tokens_per_second": 3.6808149496535814, + "peak_gpu_memory_mb": 14174.375, + "num_generated_tokens": 109, + "inference_time": 65.01349687576294, + "cpu_usage": 36.5, + "cpu_core_utilization": [ + 32.9, + 55.4, + 27.5, + 30.3 + ], + "request_id": "80d4975d-d162-40b0-8ef9-c0a8ca1a243d" + }, + "sample1_rotated_180.mp4": { + "response": "Customer enters the convenience store and browses through items on display. Employee hands customer an item, then proceeds to search for a bill. Customer picks up money from his wallet while employee continues searching bills. After finding both bills, employee scans barcode of purchased item using scanner device before handing it over to customer who takes receipt.", + "tokens_per_second": 2.722827822424792, + "peak_gpu_memory_mb": 14174.375, + "num_generated_tokens": 68, + "inference_time": 52.909793853759766, + "cpu_usage": 29.6, + "cpu_core_utilization": [ + 23.8, + 25.4, + 61.5, + 7.8 + ], + "request_id": "d67151ce-541f-48c0-9c81-232f5b638a15" + }, + "sample1_rotated_90.mp4": { + "response": "The video begins with a person sitting on the floor, facing left. They pick up an item from the shelf and place it in their bag. Then they stand up to walk away. A second individual enters frame right; this time they bend down to inspect items on the lower shelves before standing back up to continue searching for more products.", + "tokens_per_second": 2.721625615443428, + "peak_gpu_memory_mb": 14174.375, + "num_generated_tokens": 68, + "inference_time": 52.78507661819458, + "cpu_usage": 29.5, + "cpu_core_utilization": [ + 31.5, + 7.9, + 72.7, + 5.5 + ], + "request_id": "81ffc9d3-fb6c-4ad8-94f4-ac4eb142187d" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4/20250822_040510.json b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_040510.json new file mode 100644 index 0000000000000000000000000000000000000000..0ee3463012e9a8d53cd899fe536961e4d419bbda --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_040510.json @@ -0,0 +1,32 @@ +{ + "sample1_raw.mp4": { + "response": "The video captures a sequence of events inside a convenience store. Initially, the scene is static with no visible activity. Shortly afterward, an individual enters from the left side and approaches the counter area where another person stands behind it. This second person appears to be engaging in some form of transaction or interaction with the customer. Throughout the clip, there are slight movements indicating ongoing activities at the counter. The timestamp on the images shows progression over time, starting around 08:13 and advancing to 08:14, suggesting that these interactions occur within a brief period.", + "tokens_per_second": 7.668726473028378, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 119, + "inference_time": 37.55763649940491, + "cpu_usage": 19.3, + "cpu_core_utilization": [ + 22.7, + 15.6, + 20.2, + 18.5 + ], + "request_id": "83e6ecab-4aa8-4145-a723-df97c6a534ff" + }, + "sample1_rotated.mp4": { + "response": "A man enters a store with several bags of merchandise in his hand. He walks to the counter, places the items on it, and then sits down next to them. The man begins examining or organizing the products while seated at the counter.", + "tokens_per_second": 4.763776873285316, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 49, + "inference_time": 23.988155364990234, + "cpu_usage": 29.7, + "cpu_core_utilization": [ + 27.6, + 33.2, + 34.1, + 24.0 + ], + "request_id": "1cfb935b-5c0c-4f97-a288-7f0436b30c26" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4/20250822_044625.json b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_044625.json new file mode 100644 index 0000000000000000000000000000000000000000..0e9f3ea156fbb395af0e50392b51cbc5271f35ca --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_044625.json @@ -0,0 +1,62 @@ +{ + "sample1_rotated_270.mp4": { + "response": "The video shows a convenience store with various items displayed on shelves. A man wearing a hat and light blue shirt is seen interacting with the counter, possibly making a purchase or checking out. Another person in a blue shirt appears behind the counter, engaging with the first man, likely assisting him with his transaction. The background remains consistent throughout the sequence, focusing on the display of goods and the interaction between the two individuals at the checkout area.", + "tokens_per_second": 6.61058625835899, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 88, + "inference_time": 31.981347799301147, + "cpu_usage": 24.9, + "cpu_core_utilization": [ + 25.0, + 30.3, + 24.8, + 19.4 + ], + "request_id": "0dc2ca5b-e666-4795-b0e6-52bbb7232e2f" + }, + "sample1_raw.mp4": { + "response": "The video captures a sequence of events inside a convenience store. Initially, the scene is static with various items displayed on shelves and no visible human activity apart from some minor movements in the background. As time progresses, an individual enters the frame, walking towards the counter area. This person appears to be interacting with someone behind the counter, possibly making a purchase or inquiry. The interaction continues for several seconds with minimal changes in positioning between the customer and the staff member at the counter. Towards the end of the observed segment, another individual dressed in blue clothing joins the scene near the entrance of the store, standing still while observing the ongoing transaction. Throughout the clip, there are consistent timestamps indicating the progression of time within the captured footage.", + "tokens_per_second": 8.564189167013154, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 149, + "inference_time": 35.383443117141724, + "cpu_usage": 46.0, + "cpu_core_utilization": [ + 29.3, + 30.8, + 36.2, + 87.5 + ], + "request_id": "e76115d1-7380-497c-8cfc-58697760dadb" + }, + "sample1_rotated_180.mp4": { + "response": "A man in a white shirt and black pants walks into the frame. He sits down on an office chair next to a counter with shelves of merchandise behind it. A person wearing blue enters from the right side, approaches the seated individual, and extends their hand for a handshake. The two shake hands briefly before the standing person begins handling items at the cash register. Another customer dressed in dark clothing is visible sitting further back near the entrance area of the store.", + "tokens_per_second": 6.871595189671803, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 93, + "inference_time": 27.674049377441406, + "cpu_usage": 49.1, + "cpu_core_utilization": [ + 36.7, + 43.2, + 46.6, + 69.9 + ], + "request_id": "03891848-b72e-411b-a6c0-12d7a1feeec6" + }, + "sample1_rotated_90.mp4": { + "response": "The video shows a man in a convenience store setting. He is wearing a light blue shirt and dark pants, with a black bag slung over his shoulder. Initially, he stands near the counter, then moves towards an area displaying various items such as hats and possibly other accessories or small goods for sale. The focus of his activity appears to be inspecting these products closely, perhaps considering whether to purchase them. Throughout the sequence, there are no significant changes in the environment or additional people entering the frame that draw attention away from him.", + "tokens_per_second": 7.413903967514163, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 108, + "inference_time": 29.009718894958496, + "cpu_usage": 53.5, + "cpu_core_utilization": [ + 52.8, + 38.9, + 74.9, + 47.2 + ], + "request_id": "ee171f56-31f3-44aa-ab53-8278de0cf252" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4/20250822_054230.json b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_054230.json new file mode 100644 index 0000000000000000000000000000000000000000..0367552f33ce42eb637e894ab0837b66dccd7c5b --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_054230.json @@ -0,0 +1,62 @@ +{ + "sample1_rotated_270.mp4": { + "response": "The video depicts a convenience store scene where an employee is handling various tasks. Initially, the employee is seated behind the counter with their hands on it, possibly taking a break or preparing to assist customers. Shortly after, they stand up and begin interacting with items on the counter, likely organizing or checking products. Throughout the sequence, there are no significant changes in the environment or introduction of new objects; the focus remains on the actions of the individual staff member at the checkout area.", + "tokens_per_second": 6.97994619995469, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 97, + "inference_time": 31.93311309814453, + "cpu_usage": 14.5, + "cpu_core_utilization": [ + 19.4, + 9.9, + 20.4, + 8.5 + ], + "request_id": "dcfe8de7-58f0-4162-8d7b-a513c00e893c" + }, + "sample1_raw.mp4": { + "response": "The video shows a convenience store with various items on shelves and displays. A man in light blue clothing enters the scene, approaches the counter, interacts with an employee behind it, appears to make a payment or transaction using a card reader, and then exits the frame towards the left side of the image. Another person wearing dark-colored clothing briefly walks into the frame from the top right corner before moving out of view. The main focus is on the interaction between the customer and the cashier at the checkout area.", + "tokens_per_second": 7.346576015565981, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 104, + "inference_time": 32.25017738342285, + "cpu_usage": 38.6, + "cpu_core_utilization": [ + 21.7, + 37.1, + 52.7, + 42.8 + ], + "request_id": "9688bb31-4a5b-4174-a5d3-b655a94be714" + }, + "sample1_rotated_180.mp4": { + "response": "The video begins with a top-down view of a convenience store shelf stocked with various items. A person wearing light blue clothing enters the frame and sits at the counter, interacting with something on it. Shortly after, another individual dressed in white approaches from the right side of the screen, also sitting down near the counter. The two individuals engage in conversation or some form of interaction while seated.", + "tokens_per_second": 6.41842237886657, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 81, + "inference_time": 26.347336769104004, + "cpu_usage": 30.3, + "cpu_core_utilization": [ + 33.7, + 27.4, + 14.8, + 45.4 + ], + "request_id": "f8b48685-3207-4cb6-acd1-a6cc9c6dbf99" + }, + "sample1_rotated_90.mp4": { + "response": "A person in a blue shirt walks past the counter. A man wearing white clothes is seen kneeling on the floor, placing something into a display case near the store's entrance.", + "tokens_per_second": 3.94583234151262, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 38, + "inference_time": 23.4061017036438, + "cpu_usage": 30.0, + "cpu_core_utilization": [ + 48.4, + 15.8, + 22.9, + 32.8 + ], + "request_id": "efd73a2a-1666-4017-a20b-b13c69acc01c" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/MiniCPM-V-4/20250822_055004.json b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_055004.json new file mode 100644 index 0000000000000000000000000000000000000000..3d018e81a76430962f61a47e28a9640606c36f53 --- /dev/null +++ b/Direct_Transformers/outputs/MiniCPM-V-4/20250822_055004.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "In the convenience store, a person in black attire is interacting with another individual at the counter. The scene begins with various shoppers moving around the store; one wearing light blue and reaching for an item on the shelf while others pass by or wait near the shelves stocked with snacks. Another shopper dressed in dark clothing appears briefly before exiting to the right side of the frame. Throughout the sequence, the customer in black seems focused on handling transactions or inquiries at the counter, occasionally looking down at documents or items on the counter. Meanwhile, the person in light blue continues their search among the products displayed behind the counter. Other individuals are seen entering from different directions: a person in beige walks past towards the back of the store, followed by someone in blue who moves across the left side of the frame. These movements suggest normal shopping activities within the store environment as captured by the surveillance camera.", + "tokens_per_second": 9.17590678416416, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 181, + "inference_time": 39.73766207695007, + "cpu_usage": 28.0, + "cpu_core_utilization": [ + 18.8, + 19.5, + 32.4, + 41.2 + ], + "request_id": "62f6a61f-a889-4e9b-8a2b-6951dc9722c3" + }, + "sample1_rotated_270.mp4": { + "response": "The video captures a sequence of events inside a convenience store. Initially, the counter is empty with various items neatly arranged on it and shelves behind stocked with goods. A man enters from the left side, walks towards the counter, sits down, and appears to be waiting or resting his head on his hand. Shortly afterward, another person in blue clothing approaches the counter, seemingly interacting with the first individual by placing something on the counter. The interaction between these two individuals continues as they engage in what looks like an exchange or transaction involving some papers or documents placed on the counter. Throughout this process, there are no significant changes in the arrangement of products on the shelves or the overall setup of the store's interior.", + "tokens_per_second": 8.507203392291894, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 147, + "inference_time": 30.908205032348633, + "cpu_usage": 29.6, + "cpu_core_utilization": [ + 29.5, + 11.7, + 16.7, + 60.3 + ], + "request_id": "b9fca7fa-fde3-4586-8012-bdd9dc186b9a" + }, + "sample1_raw.mp4": { + "response": "The video captures a sequence of events inside a convenience store. It begins with an empty counter and shelves stocked with various items. A person wearing a blue shirt enters the scene, stands near the counter, then moves towards the right side of the frame where another individual in dark clothing is present. The two individuals engage in conversation at the far end of the store. Shortly afterward, they return to the counter area, and the interaction between them appears more focused on transactions or discussions related to the products displayed behind the counter. Throughout this process, no significant changes occur in the environment or the arrangement of objects within the store.", + "tokens_per_second": 7.992290454420001, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 127, + "inference_time": 33.75596356391907, + "cpu_usage": 37.5, + "cpu_core_utilization": [ + 31.0, + 24.7, + 59.5, + 34.9 + ], + "request_id": "78948d9e-12b2-41b6-8686-6132206bad53" + }, + "sample1_rotated_180.mp4": { + "response": "A man enters the frame, walks past a counter with various items on it and stops in front of shelves stocked with hats. He sits down at the counter facing the camera. Another person wearing blue clothing approaches from behind the counter to engage with him. Both individuals interact near the counter area while one remains seated.", + "tokens_per_second": 5.582476898169938, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 64, + "inference_time": 25.217974185943604, + "cpu_usage": 29.9, + "cpu_core_utilization": [ + 41.2, + 8.1, + 62.4, + 7.4 + ], + "request_id": "46b4eb7a-5eef-471f-8310-3b63c7cc08c8" + }, + "sample1_rotated_90.mp4": { + "response": "A man wearing a white shirt and blue shorts is seen inspecting items on display in a convenience store. The video captures his movements as he bends over the counter, seemingly examining or organizing products placed there.", + "tokens_per_second": 4.288594229547833, + "peak_gpu_memory_mb": 13140.375, + "num_generated_tokens": 43, + "inference_time": 23.7890465259552, + "cpu_usage": 30.4, + "cpu_core_utilization": [ + 48.7, + 44.8, + 13.8, + 14.2 + ], + "request_id": "7c1bd69f-ec68-4699-b7ae-d253fc203819" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250822_055548.json b/Direct_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250822_055548.json new file mode 100644 index 0000000000000000000000000000000000000000..484d6861b52c9faf1452545c0be4a0b0b61a2b9e --- /dev/null +++ b/Direct_Transformers/outputs/Qwen2-VL-2B-Instruct-AWQ/20250822_055548.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "The video captures a sequence of events in a convenience store. Initially, a man in a white shirt is seen standing at the counter, interacting with another man. A third man, dressed in black, walks into the frame and interacts with the counter. The man in the white shirt then moves to the counter, and the third man, dressed in black, walks away. The man in the white shirt continues to interact with the counter, while the man in black walks away. The video shows a brief interaction between the two men at the counter, followed by the arrival of a third man and their subsequent movement.", + "tokens_per_second": 20.134168800564364, + "peak_gpu_memory_mb": 5842.375, + "num_generated_tokens": 122, + "inference_time": 12.46465802192688, + "cpu_usage": 19.4, + "cpu_core_utilization": [ + 12.0, + 20.5, + 15.0, + 30.1 + ], + "request_id": "dc39e294-469d-421f-bfa2-1bf3d91c18a4" + }, + "sample1_rotated_270.mp4": { + "response": "The video captures a moment in a convenience store where a man is interacting with another person. The man, wearing a blue shirt, is standing near the counter, while the other person, also in a blue shirt, is seated at the counter. The man appears to be speaking to the seated person, possibly engaging in a conversation or transaction. The setting is a typical convenience store with shelves stocked with various products. The man's actions suggest he might be providing assistance or discussing something with the seated person. The video does not show any other significant events or interactions in the store.", + "tokens_per_second": 20.431106571476665, + "peak_gpu_memory_mb": 5842.375, + "num_generated_tokens": 115, + "inference_time": 8.503605842590332, + "cpu_usage": 34.1, + "cpu_core_utilization": [ + 12.1, + 22.3, + 14.3, + 87.0 + ], + "request_id": "8235bab7-dc62-460f-ada4-6de957321cc5" + }, + "sample1_raw.mp4": { + "response": "The video captures a busy convenience store scene with several people engaged in various activities. A man is seen walking through the store, possibly looking for items or checking the shelves. Another man is standing near the counter, possibly interacting with the cashier or another customer. The cashier is busy attending to the customer, handling transactions and providing assistance. The store is well-stocked with various products, including snacks and beverages, displayed on shelves and tables. The overall atmosphere suggests a typical day at a convenience store with customers and employees working together to serve and support each other.", + "tokens_per_second": 20.501854828620406, + "peak_gpu_memory_mb": 5842.375, + "num_generated_tokens": 111, + "inference_time": 12.467046022415161, + "cpu_usage": 53.8, + "cpu_core_utilization": [ + 67.7, + 66.0, + 39.3, + 42.3 + ], + "request_id": "dd6002c8-8398-4d52-8fa4-4fd187b6b7d0" + }, + "sample1_rotated_180.mp4": { + "response": "a man(607,295),(747,592)", + "tokens_per_second": 17.02981294121544, + "peak_gpu_memory_mb": 5842.375, + "num_generated_tokens": 19, + "inference_time": 3.9945120811462402, + "cpu_usage": 39.7, + "cpu_core_utilization": [ + 21.0, + 25.6, + 22.5, + 89.7 + ], + "request_id": "53d433da-8e89-473b-8094-b3749860494d" + }, + "sample1_rotated_90.mp4": { + "response": "The video captures a person walking through a convenience store, moving around the shelves and interacting with items. The person is seen walking past the shelves, picking up items, and placing them back down. The person is wearing a light-colored shirt and appears to be engaged in shopping. The shelves are filled with various items, including food and beverages, and the person is seen walking past them. The video does not show any other people or activities in the store.", + "tokens_per_second": 20.469474642883824, + "peak_gpu_memory_mb": 5842.375, + "num_generated_tokens": 91, + "inference_time": 7.3469390869140625, + "cpu_usage": 34.7, + "cpu_core_utilization": [ + 47.5, + 53.6, + 13.8, + 23.8 + ], + "request_id": "103151cf-21a1-443f-a384-6de8311204ef" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_050641.json b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_050641.json new file mode 100644 index 0000000000000000000000000000000000000000..1bf9eca74b9300f59736b5eb0a940f8531705900 --- /dev/null +++ b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_050641.json @@ -0,0 +1,62 @@ +{ + "sample1_rotated_270.mp4": { + "response": "A man walks through a convenience store aisle, passing by various products on the shelves. He continues down the aisle, passing more items.", + "tokens_per_second": 10.331790492190137, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 27, + "inference_time": 10.039179563522339, + "cpu_usage": 54.4, + "cpu_core_utilization": [ + 52.5, + 52.4, + 54.7, + 58.0 + ], + "request_id": "8cd3d96d-c2de-4134-a862-35a3fcb1b555" + }, + "sample1_raw.mp4": { + "response": "A man wearing a blue shirt is standing at the counter of a convenience store. He is looking at the items on the counter and appears to be considering his options. Another man wearing a white shirt is also standing at the counter, looking at the items as well. They both seem to be shopping for groceries.", + "tokens_per_second": 11.553518987742192, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 62, + "inference_time": 13.80137324333191, + "cpu_usage": 52.7, + "cpu_core_utilization": [ + 38.7, + 95.7, + 37.9, + 38.4 + ], + "request_id": "5d471359-940e-4bd4-b596-f4a4624c793d" + }, + "sample1_rotated_180.mp4": { + "response": "A man walks into a convenience store. He approaches the counter and interacts with an employee behind the counter. The employee takes an item from the man and hands it back to him. The man then walks out of the store.", + "tokens_per_second": 11.406962184669279, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 45, + "inference_time": 7.817859888076782, + "cpu_usage": 36.0, + "cpu_core_utilization": [ + 63.3, + 17.0, + 45.0, + 18.4 + ], + "request_id": "f4bebc66-b044-41e2-93f3-37223b475f3b" + }, + "sample1_rotated_90.mp4": { + "response": "A man in a blue shirt is standing at a counter in a convenience store. He picks up a package from the counter and examines it. He then places the package back on the counter and picks up another package. He looks at the package for a moment before placing it back on the counter. The man then walks away from the counter.", + "tokens_per_second": 11.491923072165466, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 68, + "inference_time": 9.764874935150146, + "cpu_usage": 33.5, + "cpu_core_utilization": [ + 11.3, + 73.9, + 12.9, + 35.9 + ], + "request_id": "4f0b4cd9-0146-48b2-9792-8e69c56ee3cf" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053257.json b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053257.json new file mode 100644 index 0000000000000000000000000000000000000000..3ec7ad9af50a997c9a7ad657ab0551d7bd9f4435 --- /dev/null +++ b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_053257.json @@ -0,0 +1,62 @@ +{ + "sample1_rotated_270.mp4": { + "response": "A man walks through a convenience store aisle, passing by various products on the shelves. He continues down the aisle, passing more items.", + "tokens_per_second": 14.911576721939884, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 27, + "inference_time": 6.9916605949401855, + "cpu_usage": 64.2, + "cpu_core_utilization": [ + 61.3, + 73.2, + 64.4, + 57.8 + ], + "request_id": "420e0f62-2b75-4084-9d43-f9b49d0fc06a" + }, + "sample1_raw.mp4": { + "response": "A man wearing a blue shirt is standing at the counter of a convenience store. He is looking at the items on the counter and appears to be considering his options. Another man wearing a white shirt is also standing at the counter, looking at the items as well. They both seem to be shopping for groceries.", + "tokens_per_second": 16.006104859630515, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 62, + "inference_time": 15.543591976165771, + "cpu_usage": 72.5, + "cpu_core_utilization": [ + 65.7, + 66.6, + 94.6, + 62.9 + ], + "request_id": "2927cd25-59cf-4a51-a0f5-b6c7a3382630" + }, + "sample1_rotated_180.mp4": { + "response": "A man walks into a convenience store. He approaches the counter and interacts with an employee behind the counter. The employee takes an item from the man and hands it back to him. The man then walks out of the store.", + "tokens_per_second": 16.048521811500464, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 45, + "inference_time": 6.634310722351074, + "cpu_usage": 35.1, + "cpu_core_utilization": [ + 17.8, + 17.3, + 91.5, + 13.4 + ], + "request_id": "84ae9443-0548-4aee-aaa3-ab8e2af789be" + }, + "sample1_rotated_90.mp4": { + "response": "A man in a blue shirt is standing at a counter in a convenience store. He picks up a package from the counter and examines it. He then places the package back on the counter and picks up another package. He looks at the package for a moment before placing it back on the counter. The man then walks away from the counter.", + "tokens_per_second": 16.138695170908093, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 68, + "inference_time": 8.05150556564331, + "cpu_usage": 34.1, + "cpu_core_utilization": [ + 38.0, + 14.0, + 72.5, + 11.7 + ], + "request_id": "e430b9af-6192-491b-8854-7fb9a375b31c" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_054107.json b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_054107.json new file mode 100644 index 0000000000000000000000000000000000000000..bf5410e96e096039c923e37fb75da3f469d8e245 --- /dev/null +++ b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_054107.json @@ -0,0 +1,62 @@ +{ + "sample1_rotated_270.mp4": { + "response": "A man walks through a convenience store aisle, passing by various products on the shelves. He continues down the aisle, passing more items.", + "tokens_per_second": 15.592137334361555, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 27, + "inference_time": 6.371844053268433, + "cpu_usage": 55.1, + "cpu_core_utilization": [ + 53.5, + 53.5, + 58.4, + 54.9 + ], + "request_id": "145be8a8-9764-4818-a9b2-684ef006547b" + }, + "sample1_raw.mp4": { + "response": "A man wearing a blue shirt is standing at the counter of a convenience store. He is looking at the items on the counter and appears to be considering his options. Another man wearing a white shirt is also standing at the counter, looking at the items as well. They both seem to be shopping for groceries.", + "tokens_per_second": 16.23932857727457, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 62, + "inference_time": 11.895183324813843, + "cpu_usage": 53.6, + "cpu_core_utilization": [ + 53.6, + 66.8, + 41.3, + 52.6 + ], + "request_id": "859ce3d5-f81a-43a0-afe3-d262192d6b84" + }, + "sample1_rotated_180.mp4": { + "response": "A man walks into a convenience store. He approaches the counter and interacts with an employee behind the counter. The employee takes an item from the man and hands it back to him. The man then walks out of the store.", + "tokens_per_second": 16.156670595758122, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 45, + "inference_time": 6.718708038330078, + "cpu_usage": 35.3, + "cpu_core_utilization": [ + 14.6, + 84.2, + 14.5, + 27.5 + ], + "request_id": "6737a709-d602-44c4-9693-a777732e9752" + }, + "sample1_rotated_90.mp4": { + "response": "A man in a blue shirt is standing at a counter in a convenience store. He picks up a package from the counter and examines it. He then places the package back on the counter and picks up another package. He looks at the package for a moment before placing it back on the counter. The man then walks away from the counter.", + "tokens_per_second": 16.20737149196603, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 68, + "inference_time": 8.09209156036377, + "cpu_usage": 33.9, + "cpu_core_utilization": [ + 17.3, + 20.2, + 42.4, + 55.7 + ], + "request_id": "726a0f65-094a-4f6f-a0ff-4d139e96ac66" + } +} \ No newline at end of file diff --git a/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_055712.json b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_055712.json new file mode 100644 index 0000000000000000000000000000000000000000..22e831e0e1887ce27198b23435145a08636980df --- /dev/null +++ b/Direct_Transformers/outputs/Qwen2.5-VL-3B-Instruct-AWQ/20250822_055712.json @@ -0,0 +1,77 @@ +{ + "sample2.mp4": { + "response": "A customer is shopping at a convenience store. The cashier is scanning items and ringing them up. The customer is selecting items from the display counter. The customer then walks away with their purchases.", + "tokens_per_second": 16.190728175434586, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 38, + "inference_time": 9.969894647598267, + "cpu_usage": 43.4, + "cpu_core_utilization": [ + 47.7, + 39.3, + 46.6, + 40.2 + ], + "request_id": "b1f563ec-1a2c-4af7-bf88-7e54521bd16f" + }, + "sample1_rotated_270.mp4": { + "response": "A man walks through a convenience store aisle, passing by various products on the shelves. He continues down the aisle, passing more items.", + "tokens_per_second": 15.958396581899965, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 27, + "inference_time": 5.564875364303589, + "cpu_usage": 36.6, + "cpu_core_utilization": [ + 15.3, + 52.5, + 18.4, + 60.0 + ], + "request_id": "f443de0e-ebd6-42c5-a8f3-428e02b21575" + }, + "sample1_raw.mp4": { + "response": "A man wearing a blue shirt is standing at the counter of a convenience store. He is looking at the items on the counter and appears to be considering his options. Another man wearing a white shirt is also standing at the counter, looking at the items as well. They both seem to be shopping for groceries.", + "tokens_per_second": 15.934521405551505, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 62, + "inference_time": 11.964012145996094, + "cpu_usage": 53.3, + "cpu_core_utilization": [ + 38.6, + 41.4, + 39.9, + 93.3 + ], + "request_id": "b6079f9c-278c-459a-a37e-332d5ffdd6fd" + }, + "sample1_rotated_180.mp4": { + "response": "A man walks into a convenience store. He approaches the counter and interacts with an employee behind the counter. The employee takes an item from the man and hands it back to him. The man then walks out of the store.", + "tokens_per_second": 16.11702401546876, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 45, + "inference_time": 6.645972490310669, + "cpu_usage": 35.0, + "cpu_core_utilization": [ + 13.4, + 81.1, + 15.8, + 29.8 + ], + "request_id": "7480d34f-ab80-40de-bd90-e1b1e80ba723" + }, + "sample1_rotated_90.mp4": { + "response": "A man in a blue shirt is standing at a counter in a convenience store. He picks up a package from the counter and examines it. He then places the package back on the counter and picks up another package. He looks at the package for a moment before placing it back on the counter. The man then walks away from the counter.", + "tokens_per_second": 15.820068362733391, + "peak_gpu_memory_mb": 6678.375, + "num_generated_tokens": 68, + "inference_time": 8.152392864227295, + "cpu_usage": 34.6, + "cpu_core_utilization": [ + 13.5, + 39.0, + 14.4, + 71.4 + ], + "request_id": "02f2404d-bf6d-4be9-aa69-471222e4550f" + } +} \ No newline at end of file diff --git a/Direct_Transformers/rotate.py b/Direct_Transformers/rotate.py new file mode 100644 index 0000000000000000000000000000000000000000..dbd0fd75a770cc597b3069c61e38d60e8ad4b31b --- /dev/null +++ b/Direct_Transformers/rotate.py @@ -0,0 +1,93 @@ +import cv2 +import os + +def rotate_video(input_path, output_path, rotation_angle=90): + """ + 旋转视频文件 + :param input_path: 输入视频路径 + :param output_path: 输出视频路径 + :param rotation_angle: 旋转角度 (90, 180, 270) + :return: 是否成功 + """ + try: + # 打开视频文件 + cap = cv2.VideoCapture(input_path) + + if not cap.isOpened(): + print(f"无法打开视频文件: {input_path}") + return False + + # 获取视频属性 + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + + print(f"视频信息: {width}x{height}, FPS: {fps}, 总帧数: {total_frames}") + + # 根据旋转角度调整输出视频的宽高 + if rotation_angle == 90 or rotation_angle == 270: + output_width = height + output_height = width + else: + output_width = width + output_height = height + + # 创建视频写入器 + fourcc = cv2.VideoWriter_fourcc(*'mp4v') + out = cv2.VideoWriter(output_path, fourcc, fps, (output_width, output_height)) + + frame_count = 0 + while True: + ret, frame = cap.read() + if not ret: + break + + # 旋转帧 + if rotation_angle == 90: + rotated = cv2.rotate(frame, cv2.ROTATE_90_CLOCKWISE) + elif rotation_angle == 180: + rotated = cv2.rotate(frame, cv2.ROTATE_180) + elif rotation_angle == 270: + rotated = cv2.rotate(frame, cv2.ROTATE_90_COUNTERCLOCKWISE) + else: + print("不支持的旋转角度,请使用90, 180, 或270") + return False + + # 写入旋转后的帧 + out.write(rotated) + frame_count += 1 + + if frame_count % 100 == 0: + print(".1f") + + # 释放资源 + cap.release() + out.release() + + print(f"视频旋转完成! 已处理 {frame_count} 帧") + print(f"输出文件: {output_path}") + return True + + except Exception as e: + print(f"处理视频时出错: {str(e)}") + return False + +if __name__ == "__main__": + # 设置输入输出路径 + input_video = "videos/sample1_raw.mp4" + output_video = "videos/sample1_rotated_270.mp4" + + # 检查输入文件是否存在 + if not os.path.exists(input_video): + print(f"输入视频文件不存在: {input_video}") + print("请确保视频文件在videos目录下") + else: + # 旋转视频 (默认90度顺时针) + print("开始旋转视频...") + success = rotate_video(input_video, output_video, 270) + + if success: + print("视频旋转成功!") + else: + print("视频旋转失败!") diff --git a/Direct_Transformers/run.sh b/Direct_Transformers/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..b0b6b42e55ce74add894b0463dde32c095c8daa1 --- /dev/null +++ b/Direct_Transformers/run.sh @@ -0,0 +1,4 @@ +bash scripts/run_minicpm.sh +bash scripts/run_minicpm_int4.sh +bash scripts/run_qwen.sh +bash scripts/run_qwen2_5.sh \ No newline at end of file diff --git a/Direct_Transformers/scripts/run_gemma.sh b/Direct_Transformers/scripts/run_gemma.sh new file mode 100644 index 0000000000000000000000000000000000000000..415adffc5b9a484ec8ea3585d467422733096dcf --- /dev/null +++ b/Direct_Transformers/scripts/run_gemma.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path google/gemma-3-4b-it \ \ No newline at end of file diff --git a/Direct_Transformers/scripts/run_lfm.sh b/Direct_Transformers/scripts/run_lfm.sh new file mode 100644 index 0000000000000000000000000000000000000000..9d0cc9a7e8aebc6a70098d7dcd96eaeea618fa35 --- /dev/null +++ b/Direct_Transformers/scripts/run_lfm.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path LiquidAI/LFM2-VL-1.6B \ \ No newline at end of file diff --git a/Direct_Transformers/scripts/run_minicpm.sh b/Direct_Transformers/scripts/run_minicpm.sh new file mode 100644 index 0000000000000000000000000000000000000000..0ba64a7116ef124bc4667c68fe0816e015cd27d4 --- /dev/null +++ b/Direct_Transformers/scripts/run_minicpm.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path openbmb/MiniCPM-V-4 \ diff --git a/Direct_Transformers/scripts/run_minicpm_awq.sh b/Direct_Transformers/scripts/run_minicpm_awq.sh new file mode 100644 index 0000000000000000000000000000000000000000..a2989b7ae62de4f109fe17210bfa914e2bd26fff --- /dev/null +++ b/Direct_Transformers/scripts/run_minicpm_awq.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path openbmb/MiniCPM-V-4-AWQ \ \ No newline at end of file diff --git a/Direct_Transformers/scripts/run_minicpm_int4.sh b/Direct_Transformers/scripts/run_minicpm_int4.sh new file mode 100644 index 0000000000000000000000000000000000000000..08bd24d48d6aa3315bcf1d24d50966f329d77c81 --- /dev/null +++ b/Direct_Transformers/scripts/run_minicpm_int4.sh @@ -0,0 +1,27 @@ +python infer.py \ +--model_path openbmb/MiniCPM-V-4-int4 \ +--sampling_rate 5 + +python infer.py \ +--model_path openbmb/MiniCPM-V-4-int4 \ +--sampling_rate 10 + +python infer.py \ +--model_path openbmb/MiniCPM-V-4-int4 \ +--sampling_rate 20 + +python infer.py \ +--model_path openbmb/MiniCPM-V-4-int4 \ +--sampling_rate 30 + +python infer.py \ +--model_path openbmb/MiniCPM-V-4-int4 \ +--sampling_rate 40 + +python infer.py \ +--model_path openbmb/MiniCPM-V-4-int4 \ +--sampling_rate 50 + +python infer.py \ +--model_path openbmb/MiniCPM-V-4-int4 \ +--sampling_rate 60 \ No newline at end of file diff --git a/Direct_Transformers/scripts/run_qwen.sh b/Direct_Transformers/scripts/run_qwen.sh new file mode 100644 index 0000000000000000000000000000000000000000..fc75312b0c942ba7f80112f9911fec80ea7c40bd --- /dev/null +++ b/Direct_Transformers/scripts/run_qwen.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path Qwen/Qwen2-VL-2B-Instruct-AWQ \ \ No newline at end of file diff --git a/Direct_Transformers/scripts/run_qwen2_5.sh b/Direct_Transformers/scripts/run_qwen2_5.sh new file mode 100644 index 0000000000000000000000000000000000000000..208c39c1fd7a9a3aeef199fd5cd9b092f24027df --- /dev/null +++ b/Direct_Transformers/scripts/run_qwen2_5.sh @@ -0,0 +1,2 @@ +python infer.py \ +--model_path Qwen/Qwen2.5-VL-3B-Instruct-AWQ \ diff --git a/Direct_Transformers/video_processor.py b/Direct_Transformers/video_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..cba27cea181d9264edbfcfa7020f896d2c097c0e --- /dev/null +++ b/Direct_Transformers/video_processor.py @@ -0,0 +1,124 @@ +import cv2 +import numpy as np +import base64 +from typing import List +from enum import Enum +from skimage.metrics import structural_similarity as ssim + +class FrameSamplingMethod(str, Enum): + UNIFORM = "uniform" + CONTENT_AWARE = "content_aware" + +def extract_frames( + video_path: str, + method: FrameSamplingMethod, + sampling_rate: int +) -> List[np.ndarray]: + """ + 从视频中提取帧。 + 对于UNIFORM方法,sampling_rate表示要提取的总帧数。 + 对于CONTENT_AWARE方法,sampling_rate现在也表示要提取的总帧数,但会选择变化最大的帧。 + """ + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + raise IOError(f"Cannot open video file: {video_path}") + + frames = [] + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + target_size = (420, 280) # (width, height) + + if method == FrameSamplingMethod.UNIFORM: + if sampling_rate <= 0: + cap.release() + return [] + + # 如果请求的帧数大于总帧数,则返回所有帧 + if sampling_rate >= total_frames: + while True: + ret, frame = cap.read() + if not ret: + break + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + cap.release() + return frames + + # 计算采样间隔 + step = total_frames / sampling_rate + for i in range(sampling_rate): + frame_index = int(i * step) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index) + ret, frame = cap.read() + if ret: + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + + elif method == FrameSamplingMethod.CONTENT_AWARE: + if sampling_rate <= 0: + cap.release() + return [] + + # 如果视频总帧数少于或等于请求的帧数,则返回所有帧 + if total_frames <= sampling_rate: + while True: + ret, frame = cap.read() + if not ret: + break + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + cap.release() + return frames + + # --- Pass 1: 计算所有相邻帧的SSIM分数 --- + ssim_scores = [] + cap.set(cv2.CAP_PROP_POS_FRAMES, 0) + ret, prev_frame = cap.read() + if not ret: + cap.release() + return [] + + prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY) + + for i in range(1, total_frames): + ret, current_frame = cap.read() + if not ret: + break + + current_frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) + score, _ = ssim(prev_frame_gray, current_frame_gray, full=True) + ssim_scores.append((score, i)) # 存储(ssim_score, frame_index) + prev_frame_gray = current_frame_gray + + # --- 选择变化最大的 n-1 帧 --- + # 按SSIM分数升序排序 (分数越低,差异越大) + ssim_scores.sort(key=lambda x: x[0]) + + # 选择分数最低的 n-1 帧的索引 + selected_indices = {score[1] for score in ssim_scores[:sampling_rate - 1]} + # 始终包括第一帧 (index 0) + selected_indices.add(0) + + # --- Pass 2: 根据索引提取帧 --- + sorted_indices = sorted(list(selected_indices)) + for idx in sorted_indices: + cap.set(cv2.CAP_PROP_POS_FRAMES, idx) + ret, frame = cap.read() + if ret: + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + + cap.release() + return frames + +def encode_frames_to_base64(frames: List[np.ndarray]) -> List[str]: + """ + 将OpenCV帧列表编码为base64字符串列表。 + """ + base64_frames = [] + for frame in frames: + # 将帧编码为JPEG格式 + _, buffer = cv2.imencode('.jpg', frame) + # 将缓冲区字节转换为base64字符串 + base64_str = base64.b64encode(buffer).decode('utf-8') + base64_frames.append(base64_str) + return base64_frames \ No newline at end of file diff --git a/Direct_Transformers/videos/sample1_raw.mp4 b/Direct_Transformers/videos/sample1_raw.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..43542b9586690e46395bcc69b05e8dcdcc8f88b0 --- /dev/null +++ b/Direct_Transformers/videos/sample1_raw.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:522f9168c2de3f62db3f3c35aa107577178b5e4453e8901e3310ff728c76adfa +size 49402451 diff --git a/Direct_Transformers/videos/sample1_rotated_180.mp4 b/Direct_Transformers/videos/sample1_rotated_180.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..17ce841fdaabaeefd2f9bd60c413563e2c26dcc6 --- /dev/null +++ b/Direct_Transformers/videos/sample1_rotated_180.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:393e904873071aacc6392abc0ded96cfa33d8c10cac3069ccd7d6b1824c4e2a0 +size 61533754 diff --git a/Direct_Transformers/videos/sample1_rotated_270.mp4 b/Direct_Transformers/videos/sample1_rotated_270.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..8cc3785548b037729e9037618f888126e58e46fa --- /dev/null +++ b/Direct_Transformers/videos/sample1_rotated_270.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e94508afff9b2eec34f6f2480b7ba0d8bd2dd4674e4d0f9dbf9a45ff57fed3 +size 60378289 diff --git a/Direct_Transformers/videos/sample1_rotated_90.mp4 b/Direct_Transformers/videos/sample1_rotated_90.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..59f2a002cff1679087204033c787dfdac6f13c28 --- /dev/null +++ b/Direct_Transformers/videos/sample1_rotated_90.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca1d66d7d2c0088bbd698d47a7254ee33a08b2d9fbf9ee36310f5541e01a478 +size 61850361 diff --git a/Direct_Transformers/videos/sample2.mp4 b/Direct_Transformers/videos/sample2.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..ec424eeb4c053dfc5fa76f52f16e13fd8c76c3bf --- /dev/null +++ b/Direct_Transformers/videos/sample2.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1543f8ab51ee52ddc833ceb68d590ef151e66bc205c0fb818ceb2edc4eb0e2e0 +size 27720274 diff --git a/SGLang_minicpm/1.py b/SGLang_minicpm/1.py new file mode 100644 index 0000000000000000000000000000000000000000..076a8336a18c3e000df6ea320fb06a766820279d --- /dev/null +++ b/SGLang_minicpm/1.py @@ -0,0 +1,9 @@ +from huggingface_hub import HfApi + +api = HfApi() + +api.upload_folder( + folder_path="/mnt/data/xiuying/Code", + repo_id="Wangtwohappy/T4_code", + repo_type="model" +) diff --git a/SGLang_minicpm/2.py b/SGLang_minicpm/2.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ollama_minicpm/__pycache__/video_processor.cpython-311.pyc b/ollama_minicpm/__pycache__/video_processor.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7094f8fb67dc4c4f9eb3f2dd48a4b47a611b2abf Binary files /dev/null and b/ollama_minicpm/__pycache__/video_processor.cpython-311.pyc differ diff --git a/ollama_minicpm/app.py b/ollama_minicpm/app.py new file mode 100644 index 0000000000000000000000000000000000000000..ccad4dc65c58e7968ceb074846c9beb72001e181 --- /dev/null +++ b/ollama_minicpm/app.py @@ -0,0 +1,141 @@ +import os +import uuid +import base64 +import shutil +from typing import List +import time +import cv2 +import psutil +import ollama +import uvicorn +from fastapi import FastAPI, File, UploadFile, Form, HTTPException +from fastapi.responses import JSONResponse + +try: + import pynvml + pynvml.nvmlInit() + GPU_METRICS_AVAILABLE = True +except (ImportError, pynvml.NVMLError): + GPU_METRICS_AVAILABLE = False + +from video_processor import extract_frames, FrameSamplingMethod, encode_frames_to_base64 + +import logging +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--model_name", type=str, default="openbmb/minicpm-v4:latest") +args = parser.parse_args() + +os.makedirs(f'logs/{args.model_name}', exist_ok=True) + +# 初始化FastAPI应用 +app = FastAPI(title = "Video Inference Service") + +# 定义一个临时目录来存储上传的视频 +TEMP_VIDEO_DIR = "temp_videos" +os.makedirs(TEMP_VIDEO_DIR, exist_ok=True) + +# 使用当前时间戳生成唯一的日志文件名 +log_filename = f"logs/{args.model_name}/{time.strftime('%Y%m%d_%H%M%S')}.log" +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', filename=log_filename, filemode='a') + +@app.post("/video-inference/") +async def video_inference( + prompt: str = Form(...), + video_path: str = Form(...), + sampling_method: str = Form(...), + sampling_rate: int = Form(5), + ): + """ + 接收视频和文本提示,进行推理并返回结果。 + - prompt: 用户的问题。 + - video_file: 上传的视频文件。 + - sampling_method: 帧采样方法 ('uniform' 或 'content_aware')。 + - sampling_rate: 采样率或阈值。 + """ + try: + request_start_time = time.time() + request_id = str(uuid.uuid4()) + logging.info(f"[{request_id}] Received new video inference request. Prompt: '{prompt}', Video: '{video_path}'") + + # 验证上传的文件类型 + if not os.path.exists(video_path): + raise FileNotFoundError(f"Video file not found: {video_path}") + + if not video_path.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')): + logging.warning(f"[{request_id}] File '{video_path}' may not be a video file.") + + # 转换采样方法字符串为枚举 + sampling_method_map = { + "CONTENT_AWARE": FrameSamplingMethod.CONTENT_AWARE, + "UNIFORM": FrameSamplingMethod.UNIFORM, + } + sampling_method = sampling_method_map.get(sampling_method, FrameSamplingMethod.CONTENT_AWARE) + + # 创建临时目录 + temp_frame_dir = os.path.join(TEMP_VIDEO_DIR, request_id) + os.makedirs(temp_frame_dir, exist_ok=True) + except Exception as e: + logging.error(f"[{request_id}] An error occurred during processing: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"An error occurred during processing: {str(e)}") + + try: + logging.info(f"[{request_id}] Extracting frames using method: {sampling_method.value}, rate/threshold: {sampling_rate}") + + frames = extract_frames(video_path, sampling_method, sampling_rate) + if not frames: + raise ValueError(f"Could not extract any frames from the video: {video_path}") + + logging.info(f"[{request_id}] Extracted {len(frames)} frames successfully. Saving to temporary files...") + # 2. 将帧编码为Base64 + base64_frames = encode_frames_to_base64(frames) + logging.info(f"[{request_id}] Encoded {len(base64_frames)} frames to Base64.") + + # 3. 构造面向视频的提示 + final_prompt = prompt + + # 4. 调用Ollama API + try: + logging.info(f"[{request_id}] Sending request to Ollama model '{args.model_name}'...") + + # 初始化CPU使用率测量,以便我们测量Ollama调用期间的平均使用率 + psutil.cpu_percent(interval=None) + psutil.cpu_percent(interval=None, percpu=True) + + ollama_start_time = time.time() + response = ollama.chat( + model=args.model_name, # 使用我们创建的自定义模型! + messages=[ + { + 'role': 'user', + 'content': final_prompt, + 'images': base64_frames, + } + ] + ) + ollama_end_time = time.time() + + # 在Ollama调用后立即获取CPU使用率,以获得准确的平均值 + cpu_usage = psutil.cpu_percent(interval=None) + cpu_core_utilization = psutil.cpu_percent(interval=None, percpu=True) + + logging.info(f"[{request_id}] Received response from Ollama successfully.") + return response + + except Exception as ollama_error: + # 更具体地处理Ollama的错误 + logging.error(f"[{request_id}] Ollama inference failed: {str(ollama_error)}", exc_info=True) + raise HTTPException(status_code=503, detail=f"Ollama inference failed: {str(ollama_error)}") + + except Exception as e: + logging.error(f"[{request_id}] An error occurred during processing: {str(e)}", exc_info=True) + raise HTTPException(status_code=500, detail=f"An error occurred during processing: {str(e)}") + finally: + # 清理临时文件 + if os.path.exists(temp_frame_dir): + shutil.rmtree(temp_frame_dir) + logging.info(f"[{request_id}] Cleaned up temporary file: {temp_frame_dir}") + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=8008) \ No newline at end of file diff --git a/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220204.log b/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220204.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220334.log b/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220334.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220642.log b/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220642.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220946.log b/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220946.log new file mode 100644 index 0000000000000000000000000000000000000000..c1d6d9d59319bf8fbb39e69089c48fd77f78d3d1 --- /dev/null +++ b/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_220946.log @@ -0,0 +1,475 @@ +2025-08-22 22:12:41 - INFO - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_raw.mp4' +2025-08-22 22:12:41 - INFO - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:12:48 - INFO - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:12:48 - INFO - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] Encoded 30 frames to Base64. +2025-08-22 22:12:48 - INFO - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:12:48 - ERROR - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] +2025-08-22 22:12:48 - ERROR - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 129, in video_inference + raise HTTPException(status_code=503, detail=f"Ollama inference failed: {str(ollama_error)}") +fastapi.exceptions.HTTPException: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list] +2025-08-22 22:12:48 - INFO - [cc55d1d6-ddba-4e4f-b18c-0ee148480391] Cleaned up temporary file: temp_videos/cc55d1d6-ddba-4e4f-b18c-0ee148480391 +2025-08-22 22:12:48 - INFO - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_rotated_180.mp4' +2025-08-22 22:12:48 - INFO - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:12:49 - INFO - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:12:49 - INFO - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] Encoded 30 frames to Base64. +2025-08-22 22:12:49 - INFO - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:12:49 - ERROR - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] +2025-08-22 22:12:49 - ERROR - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 129, in video_inference + raise HTTPException(status_code=503, detail=f"Ollama inference failed: {str(ollama_error)}") +fastapi.exceptions.HTTPException: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list] +2025-08-22 22:12:49 - INFO - [97584469-91ba-42d7-a5e1-c1ccf4e78c53] Cleaned up temporary file: temp_videos/97584469-91ba-42d7-a5e1-c1ccf4e78c53 +2025-08-22 22:12:49 - INFO - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_rotated_270.mp4' +2025-08-22 22:12:49 - INFO - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:12:51 - INFO - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:12:51 - INFO - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] Encoded 30 frames to Base64. +2025-08-22 22:12:51 - INFO - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:12:51 - ERROR - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] +2025-08-22 22:12:51 - ERROR - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 129, in video_inference + raise HTTPException(status_code=503, detail=f"Ollama inference failed: {str(ollama_error)}") +fastapi.exceptions.HTTPException: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list] +2025-08-22 22:12:51 - INFO - [f7e97481-d6c4-42a1-b5d6-6234ec065e4a] Cleaned up temporary file: temp_videos/f7e97481-d6c4-42a1-b5d6-6234ec065e4a +2025-08-22 22:12:51 - INFO - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_rotated_90.mp4' +2025-08-22 22:12:51 - INFO - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:12:52 - INFO - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:12:52 - INFO - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] Encoded 30 frames to Base64. +2025-08-22 22:12:52 - INFO - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:12:52 - ERROR - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] +2025-08-22 22:12:52 - ERROR - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 129, in video_inference + raise HTTPException(status_code=503, detail=f"Ollama inference failed: {str(ollama_error)}") +fastapi.exceptions.HTTPException: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list] +2025-08-22 22:12:52 - INFO - [8a73a6c5-0ab7-489f-8fe0-a98ff599aca5] Cleaned up temporary file: temp_videos/8a73a6c5-0ab7-489f-8fe0-a98ff599aca5 +2025-08-22 22:12:52 - INFO - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample2.mp4' +2025-08-22 22:12:52 - INFO - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:12:56 - INFO - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:12:56 - INFO - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] Encoded 30 frames to Base64. +2025-08-22 22:12:56 - INFO - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:12:56 - ERROR - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] +2025-08-22 22:12:56 - ERROR - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 107, in video_inference + response = ollama.chat( + ^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 348, in chat + messages=list(_copy_messages(messages)), + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in _copy_messages + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1154, in + {k: list(_copy_images(v)) if k == 'images' else v for k, v in dict(message).items() if v}, + ^^^^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/ollama/_client.py", line 1148, in _copy_images + yield image if isinstance(image, Image) else Image(value=image) + ^^^^^^^^^^^^^^^^^^ + File "/home/xiuying/miniconda3/envs/gptq/lib/python3.11/site-packages/pydantic/main.py", line 253, in __init__ + validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +pydantic_core._pydantic_core.ValidationError: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + +During handling of the above exception, another exception occurred: + +Traceback (most recent call last): + File "/mnt/data/xiuying/Code/ollama_minicpm/app.py", line 129, in video_inference + raise HTTPException(status_code=503, detail=f"Ollama inference failed: {str(ollama_error)}") +fastapi.exceptions.HTTPException: 503: Ollama inference failed: 3 validation errors for Image +value.str + Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/string_type +value.bytes + Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] + For further information visit https://errors.pydantic.dev/2.11/v/bytes_type +value.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]] + Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list] +2025-08-22 22:12:56 - INFO - [4e375d04-e95b-4f36-910f-2aba3b4bdcff] Cleaned up temporary file: temp_videos/4e375d04-e95b-4f36-910f-2aba3b4bdcff diff --git a/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_221425.log b/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_221425.log new file mode 100644 index 0000000000000000000000000000000000000000..5e7f7742c67ee08df1904dfa62948450fb8d7e58 --- /dev/null +++ b/ollama_minicpm/logs/openbmb/minicpm-v4:latest/20250822_221425.log @@ -0,0 +1,40 @@ +2025-08-22 22:14:29 - INFO - [355c89f9-f5cd-4608-8f2b-edf7402759ae] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_raw.mp4' +2025-08-22 22:14:29 - INFO - [355c89f9-f5cd-4608-8f2b-edf7402759ae] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:14:34 - INFO - [355c89f9-f5cd-4608-8f2b-edf7402759ae] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:14:34 - INFO - [355c89f9-f5cd-4608-8f2b-edf7402759ae] Encoded 30 frames to Base64. +2025-08-22 22:14:34 - INFO - [355c89f9-f5cd-4608-8f2b-edf7402759ae] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:15:52 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK" +2025-08-22 22:15:52 - INFO - [355c89f9-f5cd-4608-8f2b-edf7402759ae] Received response from Ollama successfully. +2025-08-22 22:15:52 - INFO - [355c89f9-f5cd-4608-8f2b-edf7402759ae] Cleaned up temporary file: temp_videos/355c89f9-f5cd-4608-8f2b-edf7402759ae +2025-08-22 22:15:52 - INFO - [e9838f93-bfec-4b85-a433-2de8f8a2b506] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_rotated_180.mp4' +2025-08-22 22:15:52 - INFO - [e9838f93-bfec-4b85-a433-2de8f8a2b506] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:15:53 - INFO - [e9838f93-bfec-4b85-a433-2de8f8a2b506] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:15:53 - INFO - [e9838f93-bfec-4b85-a433-2de8f8a2b506] Encoded 30 frames to Base64. +2025-08-22 22:15:53 - INFO - [e9838f93-bfec-4b85-a433-2de8f8a2b506] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:16:03 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK" +2025-08-22 22:16:03 - INFO - [e9838f93-bfec-4b85-a433-2de8f8a2b506] Received response from Ollama successfully. +2025-08-22 22:16:03 - INFO - [e9838f93-bfec-4b85-a433-2de8f8a2b506] Cleaned up temporary file: temp_videos/e9838f93-bfec-4b85-a433-2de8f8a2b506 +2025-08-22 22:16:03 - INFO - [89779792-985e-40b9-9b3b-f638f7ebcc92] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_rotated_270.mp4' +2025-08-22 22:16:03 - INFO - [89779792-985e-40b9-9b3b-f638f7ebcc92] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:16:04 - INFO - [89779792-985e-40b9-9b3b-f638f7ebcc92] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:16:04 - INFO - [89779792-985e-40b9-9b3b-f638f7ebcc92] Encoded 30 frames to Base64. +2025-08-22 22:16:04 - INFO - [89779792-985e-40b9-9b3b-f638f7ebcc92] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:17:22 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK" +2025-08-22 22:17:22 - INFO - [89779792-985e-40b9-9b3b-f638f7ebcc92] Received response from Ollama successfully. +2025-08-22 22:17:22 - INFO - [89779792-985e-40b9-9b3b-f638f7ebcc92] Cleaned up temporary file: temp_videos/89779792-985e-40b9-9b3b-f638f7ebcc92 +2025-08-22 22:17:22 - INFO - [8b4e56c8-e2e6-4741-abd8-73c3f479eef9] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample1_rotated_90.mp4' +2025-08-22 22:17:22 - INFO - [8b4e56c8-e2e6-4741-abd8-73c3f479eef9] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:17:22 - INFO - [8b4e56c8-e2e6-4741-abd8-73c3f479eef9] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:17:22 - INFO - [8b4e56c8-e2e6-4741-abd8-73c3f479eef9] Encoded 30 frames to Base64. +2025-08-22 22:17:22 - INFO - [8b4e56c8-e2e6-4741-abd8-73c3f479eef9] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:17:32 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK" +2025-08-22 22:17:32 - INFO - [8b4e56c8-e2e6-4741-abd8-73c3f479eef9] Received response from Ollama successfully. +2025-08-22 22:17:32 - INFO - [8b4e56c8-e2e6-4741-abd8-73c3f479eef9] Cleaned up temporary file: temp_videos/8b4e56c8-e2e6-4741-abd8-73c3f479eef9 +2025-08-22 22:17:32 - INFO - [7a39d309-c2a4-443e-bba6-48a95430757d] Received new video inference request. Prompt: 'Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see', Video: '/mnt/data/xiuying/Code/test/videos/sample2.mp4' +2025-08-22 22:17:32 - INFO - [7a39d309-c2a4-443e-bba6-48a95430757d] Extracting frames using method: uniform, rate/threshold: 30 +2025-08-22 22:17:35 - INFO - [7a39d309-c2a4-443e-bba6-48a95430757d] Extracted 30 frames successfully. Saving to temporary files... +2025-08-22 22:17:35 - INFO - [7a39d309-c2a4-443e-bba6-48a95430757d] Encoded 30 frames to Base64. +2025-08-22 22:17:35 - INFO - [7a39d309-c2a4-443e-bba6-48a95430757d] Sending request to Ollama model 'openbmb/minicpm-v4:latest'... +2025-08-22 22:17:44 - INFO - HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK" +2025-08-22 22:17:44 - INFO - [7a39d309-c2a4-443e-bba6-48a95430757d] Received response from Ollama successfully. +2025-08-22 22:17:44 - INFO - [7a39d309-c2a4-443e-bba6-48a95430757d] Cleaned up temporary file: temp_videos/7a39d309-c2a4-443e-bba6-48a95430757d diff --git a/ollama_minicpm/logs/qwen2.5vl-int4:latest/20250822_214943.log b/ollama_minicpm/logs/qwen2.5vl-int4:latest/20250822_214943.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/ollama_minicpm/output_0823/LFM/20250822_220220/videos.json b/ollama_minicpm/output_0823/LFM/20250822_220220/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..ed2f9c61313f9713fc7de7120c5ece5a63912ec2 --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_220220/videos.json @@ -0,0 +1,72 @@ +{ + "sample1_raw.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.013122797012329102 + }, + "sample1_rotated_180.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.01103973388671875 + }, + "sample1_rotated_270.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.010414838790893555 + }, + "sample1_rotated_90.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.01027369499206543 + }, + "sample2.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.010486602783203125 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_220337/videos.json b/ollama_minicpm/output_0823/LFM/20250822_220337/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..75f3fd7addb6217e4d76aad146ff79f841a37aaa --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_220337/videos.json @@ -0,0 +1,72 @@ +{ + "sample1_raw.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.01227426528930664 + }, + "sample1_rotated_180.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.011414527893066406 + }, + "sample1_rotated_270.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.01065683364868164 + }, + "sample1_rotated_90.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.010521411895751953 + }, + "sample2.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.0107269287109375 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_220446/videos.json b/ollama_minicpm/output_0823/LFM/20250822_220446/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..e954a6929ab9a8487fbee176ad1b66f4263fc0b6 --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_220446/videos.json @@ -0,0 +1,132 @@ +{ + "sample1_raw.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011782407760620117 + }, + "sample1_rotated_180.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011652708053588867 + }, + "sample1_rotated_270.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010704517364501953 + }, + "sample1_rotated_90.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011075973510742188 + }, + "sample2.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010368824005126953 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_220645/videos.json b/ollama_minicpm/output_0823/LFM/20250822_220645/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..ac0a407afe4b79a2b604296979a83fac55f1065d --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_220645/videos.json @@ -0,0 +1,132 @@ +{ + "sample1_raw.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.014278173446655273 + }, + "sample1_rotated_180.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.012834310531616211 + }, + "sample1_rotated_270.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011817455291748047 + }, + "sample1_rotated_90.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011409521102905273 + }, + "sample2.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.012181997299194336 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_220803/videos.json b/ollama_minicpm/output_0823/LFM/20250822_220803/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..b6168b9df0ff784aded04e064f0da07e11aeb056 --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_220803/videos.json @@ -0,0 +1,132 @@ +{ + "sample1_raw.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011797904968261719 + }, + "sample1_rotated_180.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011256694793701172 + }, + "sample1_rotated_270.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010596275329589844 + }, + "sample1_rotated_90.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010581016540527344 + }, + "sample2.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010172128677368164 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_220858/videos.json b/ollama_minicpm/output_0823/LFM/20250822_220858/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..a76291c8d552eb47372116d1c373398008449206 --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_220858/videos.json @@ -0,0 +1,132 @@ +{ + "sample1_raw.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.012153863906860352 + }, + "sample1_rotated_180.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.011232852935791016 + }, + "sample1_rotated_270.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010737180709838867 + }, + "sample1_rotated_90.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010680198669433594 + }, + "sample2.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + }, + { + "type": "enum", + "loc": [ + "body", + "sampling_method" + ], + "msg": "Input should be 'uniform' or 'content_aware'", + "input": "UNIFORM", + "ctx": { + "expected": "'uniform' or 'content_aware'" + } + } + ], + "request_time": 0.010709524154663086 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_220947/videos.json b/ollama_minicpm/output_0823/LFM/20250822_220947/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..2c15592ac4d3e2029742fa71f8191d5af790789d --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_220947/videos.json @@ -0,0 +1,72 @@ +{ + "sample1_raw.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.012161731719970703 + }, + "sample1_rotated_180.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.011288642883300781 + }, + "sample1_rotated_270.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.01048135757446289 + }, + "sample1_rotated_90.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.010743379592895508 + }, + "sample2.mp4": { + "detail": [ + { + "type": "missing", + "loc": [ + "body", + "video_path" + ], + "msg": "Field required", + "input": null + } + ], + "request_time": 0.010181427001953125 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_221241/videos.json b/ollama_minicpm/output_0823/LFM/20250822_221241/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..9aa996c7a99d029fa0b76a62711778ef030230bc --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_221241/videos.json @@ -0,0 +1,22 @@ +{ + "sample1_raw.mp4": { + "detail": "An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image\nvalue.str\n Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/string_type\nvalue.bytes\n Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/bytes_type\nvalue.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]]\n Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...CM90JJx+Qooop3Zz+yif/Z'], input_type=list]", + "request_time": 6.8742311000823975 + }, + "sample1_rotated_180.mp4": { + "detail": "An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image\nvalue.str\n Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/string_type\nvalue.bytes\n Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/bytes_type\nvalue.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]]\n Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...OSc96KKK6oYek4J2C7P//Z'], input_type=list]", + "request_time": 1.223984956741333 + }, + "sample1_rotated_270.mp4": { + "detail": "An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image\nvalue.str\n Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/string_type\nvalue.bytes\n Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/bytes_type\nvalue.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]]\n Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...XyQcZJXqpooorI7YpWR//Z'], input_type=list]", + "request_time": 1.299971103668213 + }, + "sample1_rotated_90.mp4": { + "detail": "An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image\nvalue.str\n Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/string_type\nvalue.bytes\n Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/bytes_type\nvalue.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]]\n Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...KKK8CUmpNHpRd4pn//2Q=='], input_type=list]", + "request_time": 1.251166820526123 + }, + "sample2.mp4": { + "detail": "An error occurred during processing: 503: Ollama inference failed: 3 validation errors for Image\nvalue.str\n Input should be a valid string [type=string_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/string_type\nvalue.bytes\n Input should be a valid bytes [type=bytes_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list]\n For further information visit https://errors.pydantic.dev/2.11/v/bytes_type\nvalue.lax-or-strict[lax=union[json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]],function-after[path_validator(), str]],strict=json-or-python[json=function-after[path_validator(), str],python=is-instance[Path]]]\n Input is not a valid path for [type=path_type, input_value=['/9j/4AAQSkZJRgABAQAAAQA...FdLhG59BTjTlBNxX3H/9k='], input_type=list]", + "request_time": 4.366525173187256 + } +} \ No newline at end of file diff --git a/ollama_minicpm/output_0823/LFM/20250822_221429/videos.json b/ollama_minicpm/output_0823/LFM/20250822_221429/videos.json new file mode 100644 index 0000000000000000000000000000000000000000..d398cc21eba026a3d819b7931b94fd4e923f5c5b --- /dev/null +++ b/ollama_minicpm/output_0823/LFM/20250822_221429/videos.json @@ -0,0 +1,107 @@ +{ + "sample1_raw.mp4": { + "model": "openbmb/minicpm-v4:latest", + "created_at": "2025-08-22T22:15:52.633968237Z", + "done": true, + "done_reason": "stop", + "total_duration": 78335113827, + "load_duration": 24045126462, + "prompt_eval_count": 73, + "prompt_eval_duration": 9530881247, + "eval_count": 2990, + "eval_duration": 44664765695, + "message": { + "role": "assistant", + "content": "\nThe footage depicts a brief scene inside a small retail establishment, likely focusing on quick transactions due to its limited nature as inferred from typical characteristics associated with such businesses:\n\n1. **Initial Interaction**: The primary action involves an individual approaching or engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n2. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n3. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n4. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n5. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n6. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n7. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n8. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n9. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n10. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n11. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n12. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n13. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n14. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n15. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n16. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n17. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n18. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n19. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n20. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n21. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n22. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n23. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n24. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n25. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n26. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n27. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n28. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n29. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n30. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n31. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n32. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n33. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n34. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n35. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n36. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n37. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n38. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n39. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)\n\n40. **Counter Interaction**: The customer is seen engaging in some form of transaction at the counter within this compact space dedicated primarily for immediate purchases; it's evident that customers come here mainly seeking fast service before moving on elsewhere without delay if necessary based upon personal preferences regarding convenience factors involved amongst competing choices available among similar retailers nationwide across United States specifically:)", + "thinking": null, + "images": null, + "tool_name": null, + "tool_calls": null + }, + "request_time": 83.45417761802673 + }, + "sample1_rotated_180.mp4": { + "model": "openbmb/minicpm-v4:latest", + "created_at": "2025-08-22T22:16:03.402125371Z", + "done": true, + "done_reason": "stop", + "total_duration": 9793233459, + "load_duration": 77853807, + "prompt_eval_count": 73, + "prompt_eval_duration": 7738286972, + "eval_count": 143, + "eval_duration": 1939833160, + "message": { + "role": "assistant", + "content": "\nIn this one-minute footage from a convenience store, several notable activities are observed: A person is seen pushing a shopping cart containing various items including groceries like bread and vegetables such as cabbage or kale. Another individual appears to be reaching into the same area of shelves in search of something specific among an assortment of packaged goods that could range from personal care products, snacks, beverages to small electronics accessories etcetera indicating perhaps some daily essentials being stocked up before closing hours . Lastly , there's another person who seems engaged with customers at till counter dispensing cash or issuing receipt upon purchase completion hence highlighting seamless transaction process taking place inside convenience store premises during working timeframe", + "thinking": null, + "images": null, + "tool_name": null, + "tool_calls": null + }, + "request_time": 10.766891956329346 + }, + "sample1_rotated_270.mp4": { + "model": "openbmb/minicpm-v4:latest", + "created_at": "2025-08-22T22:17:22.076880468Z", + "done": true, + "done_reason": "stop", + "total_duration": 77720965850, + "load_duration": 76358929, + "prompt_eval_count": 73, + "prompt_eval_duration": 7735964833, + "eval_count": 4420, + "eval_duration": 69873338560, + "message": { + "role": "assistant", + "content": "\n1. A customer walks into a brightly lit, clean convenience store with shelves stocked neatly behind glass cases filled mostly with snacks like chips and candies as well as drinks such as sodas or bottled water among other things I cannot clearly identify from this distance due to lack of focus in cameras lens causing blurry vision making it hard for me to tell exactly what those items are so they just appear indistinct blobby masses behind glass rather than clear identifiable objects which would allow customers like myself who frequent convenience stores on a regular basis such as daily after work or during lunch breaks etc., thus requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail establishments aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail stores aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail stores aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail stores aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail stores aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail stores aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail stores aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much fuss since most people visiting these types of retail stores aren't there for leisure purposes but more likely due necessity demands needing immediate solutions available immediately right away unlike other places where one might have to wait longer periods times before getting whatever it is that they need especially in cases like those mentioned above which involve emergency situations requiring quick access without much f", + "thinking": null, + "images": null, + "tool_name": null, + "tool_calls": null + }, + "request_time": 78.67481827735901 + }, + "sample1_rotated_90.mp4": { + "model": "openbmb/minicpm-v4:latest", + "created_at": "2025-08-22T22:17:32.324124571Z", + "done": true, + "done_reason": "stop", + "total_duration": 9294020303, + "load_duration": 77716445, + "prompt_eval_count": 73, + "prompt_eval_duration": 7755943099, + "eval_count": 105, + "eval_duration": 1425895890, + "message": { + "role": "assistant", + "content": "\nIn this brief surveillance footage, a person is seen walking into a small establishment with glass doors, possibly indicating that it's an entrance to something like a bar or restaurant area within another location (e.g., mall). The individual appears focused on reaching their destination quickly; they walk briskly towards the doorway and enter through one of its transparent panels before closing them behind her back out from sight into whatever space lies beyond that entry point in line with what's visible around here right now\u2026", + "thinking": null, + "images": null, + "tool_name": null, + "tool_calls": null + }, + "request_time": 10.245770692825317 + }, + "sample2.mp4": { + "model": "openbmb/minicpm-v4:latest", + "created_at": "2025-08-22T22:17:44.931945234Z", + "done": true, + "done_reason": "stop", + "total_duration": 9173121311, + "load_duration": 72262073, + "prompt_eval_count": 73, + "prompt_eval_duration": 7851252041, + "eval_count": 89, + "eval_duration": 1212567852, + "message": { + "role": "assistant", + "content": "\nA customer walks into a small, brightly lit convenience store with white walls and minimal decor. The individual is dressed casually and appears to be focused as they approach the counter in search of items or assistance from staff members behind it who are engaged on various tasks such as processing transactions using cash registers among others which suggests that these individuals play integral roles within this establishment hence contributing towards its smooth operation therefore making them an essential part thereof.", + "thinking": null, + "images": null, + "tool_name": null, + "tool_calls": null + }, + "request_time": 12.607224225997925 + } +} \ No newline at end of file diff --git a/ollama_minicpm/test.py b/ollama_minicpm/test.py new file mode 100644 index 0000000000000000000000000000000000000000..dde787e77bb1aae27c5309300ca744743713ddd8 --- /dev/null +++ b/ollama_minicpm/test.py @@ -0,0 +1,54 @@ +import os +import subprocess +import sys +import argparse +import json +import time +from tqdm import tqdm +output_dir = "output_0823" + +parser = argparse.ArgumentParser() +parser.add_argument("--model", type=str, default="LFM") +args = parser.parse_args() +output_dir = os.path.join(output_dir, args.model) +os.makedirs(output_dir, exist_ok=True) +VIDEO_FILE_DIR = "/mnt/data/xiuying/Code/test/videos" + +# API服务器的URL +API_URL = "http://127.0.0.1:8008/video-inference/" + + +PROMPT = "Summarize the key observable events in this 1-minute convenience store video clip. Focus strictly on the physical actions and interactions of the people. Describe only what you can see; do not interpret intentions, relationships, or work efficiency. Avoid all repetitive descriptions of the store's layout or shelves." + +files = os.listdir(VIDEO_FILE_DIR) +files.sort() +total_output = {} +cur_time = time.strftime("%Y%m%d_%H%M%S", time.localtime()) +output_file_path = os.path.join(output_dir, cur_time, f"{VIDEO_FILE_DIR.split('/')[-1]}.json") +os.makedirs(os.path.join(output_dir, cur_time), exist_ok=True) +for file in tqdm(files): + video_file_path = os.path.join(VIDEO_FILE_DIR, file) + start_time = time.time() + command = ( + f"curl -v -X POST '{API_URL}' " + f"-F \"prompt={PROMPT}\" " + f"-F \"video_path={video_file_path}\" " + f"-F \"sampling_method=UNIFORM\" " + f"-F \"sampling_rate=30\" " + ) + + print("将要执行以下 cURL 命令:") + print("---------------------------------") + print(command) + print("---------------------------------") + print("\n正在执行...\n") + + return_result = subprocess.check_output(command, shell=True) + response = json.loads(return_result) + total_output[file] = response + end_time = time.time() + total_output[file]["request_time"] = end_time - start_time + with open(output_file_path, "w") as f: + json.dump(total_output, f, indent=4) + +print("\n\n✅ 测试脚本执行完毕。") diff --git a/ollama_minicpm/video_processor.py b/ollama_minicpm/video_processor.py new file mode 100644 index 0000000000000000000000000000000000000000..cba27cea181d9264edbfcfa7020f896d2c097c0e --- /dev/null +++ b/ollama_minicpm/video_processor.py @@ -0,0 +1,124 @@ +import cv2 +import numpy as np +import base64 +from typing import List +from enum import Enum +from skimage.metrics import structural_similarity as ssim + +class FrameSamplingMethod(str, Enum): + UNIFORM = "uniform" + CONTENT_AWARE = "content_aware" + +def extract_frames( + video_path: str, + method: FrameSamplingMethod, + sampling_rate: int +) -> List[np.ndarray]: + """ + 从视频中提取帧。 + 对于UNIFORM方法,sampling_rate表示要提取的总帧数。 + 对于CONTENT_AWARE方法,sampling_rate现在也表示要提取的总帧数,但会选择变化最大的帧。 + """ + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + raise IOError(f"Cannot open video file: {video_path}") + + frames = [] + total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) + target_size = (420, 280) # (width, height) + + if method == FrameSamplingMethod.UNIFORM: + if sampling_rate <= 0: + cap.release() + return [] + + # 如果请求的帧数大于总帧数,则返回所有帧 + if sampling_rate >= total_frames: + while True: + ret, frame = cap.read() + if not ret: + break + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + cap.release() + return frames + + # 计算采样间隔 + step = total_frames / sampling_rate + for i in range(sampling_rate): + frame_index = int(i * step) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index) + ret, frame = cap.read() + if ret: + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + + elif method == FrameSamplingMethod.CONTENT_AWARE: + if sampling_rate <= 0: + cap.release() + return [] + + # 如果视频总帧数少于或等于请求的帧数,则返回所有帧 + if total_frames <= sampling_rate: + while True: + ret, frame = cap.read() + if not ret: + break + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + cap.release() + return frames + + # --- Pass 1: 计算所有相邻帧的SSIM分数 --- + ssim_scores = [] + cap.set(cv2.CAP_PROP_POS_FRAMES, 0) + ret, prev_frame = cap.read() + if not ret: + cap.release() + return [] + + prev_frame_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY) + + for i in range(1, total_frames): + ret, current_frame = cap.read() + if not ret: + break + + current_frame_gray = cv2.cvtColor(current_frame, cv2.COLOR_BGR2GRAY) + score, _ = ssim(prev_frame_gray, current_frame_gray, full=True) + ssim_scores.append((score, i)) # 存储(ssim_score, frame_index) + prev_frame_gray = current_frame_gray + + # --- 选择变化最大的 n-1 帧 --- + # 按SSIM分数升序排序 (分数越低,差异越大) + ssim_scores.sort(key=lambda x: x[0]) + + # 选择分数最低的 n-1 帧的索引 + selected_indices = {score[1] for score in ssim_scores[:sampling_rate - 1]} + # 始终包括第一帧 (index 0) + selected_indices.add(0) + + # --- Pass 2: 根据索引提取帧 --- + sorted_indices = sorted(list(selected_indices)) + for idx in sorted_indices: + cap.set(cv2.CAP_PROP_POS_FRAMES, idx) + ret, frame = cap.read() + if ret: + resized_frame = cv2.resize(frame, target_size) + frames.append(resized_frame) + + cap.release() + return frames + +def encode_frames_to_base64(frames: List[np.ndarray]) -> List[str]: + """ + 将OpenCV帧列表编码为base64字符串列表。 + """ + base64_frames = [] + for frame in frames: + # 将帧编码为JPEG格式 + _, buffer = cv2.imencode('.jpg', frame) + # 将缓冲区字节转换为base64字符串 + base64_str = base64.b64encode(buffer).decode('utf-8') + base64_frames.append(base64_str) + return base64_frames \ No newline at end of file diff --git a/vllm-deploy/=0.46.1 b/vllm-deploy/=0.46.1 new file mode 100644 index 0000000000000000000000000000000000000000..e803fae6780b4a7837f5189a790602ae9c935b0c --- /dev/null +++ b/vllm-deploy/=0.46.1 @@ -0,0 +1,24 @@ +Requirement already satisfied: bitsandbytes in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (0.47.0) +Requirement already satisfied: torch<3,>=2.2 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from bitsandbytes) (2.7.1+cu118) +Requirement already satisfied: numpy>=1.17 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from bitsandbytes) (2.2.6) +Requirement already satisfied: filelock in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (3.19.1) +Requirement already satisfied: typing-extensions>=4.10.0 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (4.12.2) +Requirement already satisfied: setuptools in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (78.1.1) +Requirement already satisfied: sympy>=1.13.3 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (1.13.3) +Requirement already satisfied: networkx in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (3.3) +Requirement already satisfied: jinja2 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (3.1.6) +Requirement already satisfied: fsspec in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (2024.6.1) +Requirement already satisfied: nvidia-cuda-nvrtc-cu11==11.8.89 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (11.8.89) +Requirement already satisfied: nvidia-cuda-runtime-cu11==11.8.89 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (11.8.89) +Requirement already satisfied: nvidia-cuda-cupti-cu11==11.8.87 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (11.8.87) +Requirement already satisfied: nvidia-cudnn-cu11==9.1.0.70 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (9.1.0.70) +Requirement already satisfied: nvidia-cublas-cu11==11.11.3.6 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (11.11.3.6) +Requirement already satisfied: nvidia-cufft-cu11==10.9.0.58 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (10.9.0.58) +Requirement already satisfied: nvidia-curand-cu11==10.3.0.86 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (10.3.0.86) +Requirement already satisfied: nvidia-cusolver-cu11==11.4.1.48 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (11.4.1.48) +Requirement already satisfied: nvidia-cusparse-cu11==11.7.5.86 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (11.7.5.86) +Requirement already satisfied: nvidia-nccl-cu11==2.21.5 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (2.21.5) +Requirement already satisfied: nvidia-nvtx-cu11==11.8.86 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (11.8.86) +Requirement already satisfied: triton==3.3.1 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from torch<3,>=2.2->bitsandbytes) (3.3.1) +Requirement already satisfied: mpmath<1.4,>=1.1.0 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from sympy>=1.13.3->torch<3,>=2.2->bitsandbytes) (1.3.0) +Requirement already satisfied: MarkupSafe>=2.0 in /home/xiuying/miniconda3/envs/vllm/lib/python3.12/site-packages (from jinja2->torch<3,>=2.2->bitsandbytes) (2.1.5) diff --git a/vllm-deploy/MiniCPM-V-4-Q4_K_M.gguf b/vllm-deploy/MiniCPM-V-4-Q4_K_M.gguf new file mode 100644 index 0000000000000000000000000000000000000000..243462e0c764206e6582ebac626de8fa5df73f92 --- /dev/null +++ b/vllm-deploy/MiniCPM-V-4-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48f384d199d0015e9d562577bc334c2e2444efa7e7951c92bb04cf70d2ebb185 +size 2189861216 diff --git a/vllm-deploy/gguf_template.py b/vllm-deploy/gguf_template.py new file mode 100644 index 0000000000000000000000000000000000000000..24d3137e763601e92ffa08a6de9d30344685a196 --- /dev/null +++ b/vllm-deploy/gguf_template.py @@ -0,0 +1,39 @@ +from vllm import LLM, SamplingParams + +# In this script, we demonstrate how to pass input to the chat method: +conversation = [ + { + "role": "system", + "content": "You are a helpful assistant" + }, + { + "role": "user", + "content": "Hello" + }, + { + "role": "assistant", + "content": "Hello! How can I assist you today?" + }, + { + "role": "user", + "content": "Write an essay about the importance of higher education.", + }, +] + +# Create a sampling params object. +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + +# Create an LLM. +llm = LLM(model="/mnt/data/xiuying/Code/vllm-deploy/MiniCPM-V-4-Q4_K_M.gguf", + tokenizer="openbmb/MiniCPM-V-4", + trust_remote_code=True + ) +# Generate texts from the prompts. The output is a list of RequestOutput objects +# that contain the prompt, generated text, and other information. +outputs = llm.chat(conversation, sampling_params) + +# Print the outputs. +for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") \ No newline at end of file diff --git a/vllm-deploy/minicpm.py b/vllm-deploy/minicpm.py new file mode 100644 index 0000000000000000000000000000000000000000..48ce1b9d38e2af8b9716eb79fd66438463326ce4 --- /dev/null +++ b/vllm-deploy/minicpm.py @@ -0,0 +1,1440 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +# Adapted from +# https://github.com/huggingface/transformers/blob/v4.28.0/src/transformers/models/llama/modeling_llama.py +# Copyright 2023 The vLLM team. +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Inference-only MiniCPM-V model compatible with HuggingFace weights.""" +import math +from collections import defaultdict +from collections.abc import Iterable, Mapping, Sequence +from functools import partial +from typing import Annotated, Any, Callable, Literal, Optional, Union + +import numpy as np +import torch +import torch.types +from torch import nn +from transformers import BatchFeature, PretrainedConfig +from typing_extensions import TypeVar + +from vllm.config import VllmConfig +from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.layers.quantization.awq import AWQConfig +from vllm.model_executor.layers.quantization.awq_marlin import AWQMarlinConfig +from vllm.model_executor.layers.resampler import (BaseResampler, Resampler2, + get_2d_sincos_pos_embed) +from vllm.model_executor.model_loader.utils import set_default_torch_dtype +from vllm.model_executor.models.llama import LlamaForCausalLM +from vllm.model_executor.models.minicpm import MiniCPMForCausalLM +from vllm.model_executor.models.module_mapping import MultiModelKeys +from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM +from vllm.model_executor.sampling_metadata import SamplingMetadata +from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargsItems +from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, + NestedTensors) +from vllm.multimodal.parse import (DictEmbeddingItems, ImageItem, + ImageProcessorItems, ImageSize, + ModalityData, ModalityDataItems, + MultiModalDataItems, MultiModalDataParser, + VideoItem, VideoProcessorItems) +from vllm.multimodal.processing import (BaseMultiModalProcessor, + BaseProcessingInfo, PromptReplacement, + PromptUpdate, PromptUpdateDetails) +from vllm.multimodal.profiling import BaseDummyInputsBuilder +from vllm.platforms import current_platform +from vllm.sequence import IntermediateTensors +from vllm.utils import flatten_2d_lists +from vllm.utils.tensor_schema import TensorSchema, TensorShape + +from .idefics2_vision_model import Idefics2VisionTransformer +from .interfaces import (MultiModalEmbeddings, SupportsLoRA, + SupportsMultiModal, SupportsPP) +from .utils import (AutoWeightsLoader, flatten_bn, maybe_prefix, + merge_multimodal_embeddings) + +# For profile run +_MAX_FRAMES_PER_VIDEO = 16 + + +class MiniCPMVImagePixelInputs(TensorSchema): + """ + Dimensions: + - bns: Batch size * number of images * number of slices + - bn: Batch size * number of images + - c: Number of channels + - h: Height + - w: Width + """ + + type: Literal["pixel_values"] = "pixel_values" + + # Note that the image size may vary, so we pass it as a list instead of a + # batched tensor. + pixel_values: Annotated[ + list[torch.Tensor], + TensorShape("bns", "c", "h", "w", dynamic_dims={"h", "w"}), + ] + tgt_sizes: Annotated[ + torch.Tensor, + TensorShape("bns", 2), # This should be in `(height, width)` format. + ] + num_slices: Annotated[ + torch.Tensor, + TensorShape("bn"), + ] + + +class MiniCPMVImageEmbeddingInputs(TensorSchema): + """ + Dimensions: + - bn: Batch size * number of images + - ns: Number of slices + - hs: Hidden size (must match language model backbone) + """ + + type: Literal["image_embeds"] + image_embeds: Annotated[ + Union[torch.Tensor, list[torch.Tensor]], + TensorShape("bn", "ns", "hs"), + ] + + +MiniCPMVImageInputs = Union[MiniCPMVImagePixelInputs, + MiniCPMVImageEmbeddingInputs] + +DEFAULT_LN = partial(nn.LayerNorm, eps=1e-6) + + +class Resampler2_5(BaseResampler): + + def __init__(self, + num_queries: int, + embed_dim: int, + num_heads: int, + kv_dim: Optional[int] = None, + norm_layer: Callable[[int], nn.LayerNorm] = DEFAULT_LN, + max_size: tuple[int, int] = (70, 70), + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "") -> None: + super().__init__(num_queries, + embed_dim, + num_heads, + kv_dim, + norm_layer, + quant_config=quant_config, + prefix=prefix) + + self.max_size = max_size + self._set_2d_pos_cache(self.max_size) + + def _set_2d_pos_cache(self, + max_size: tuple[int, int], + device: torch.types.Device = "cpu") -> None: + pos_embed_arr = get_2d_sincos_pos_embed(self.embed_dim, + max_size, + version=(2, 5)) + pos_embed = torch.from_numpy(pos_embed_arr).float().to(device) + self.register_buffer("pos_embed", pos_embed, persistent=False) + + def _adjust_pos_cache(self, tgt_sizes: torch.Tensor, + device: torch.types.Device) -> None: + max_h = tgt_sizes[:, 0].max().item() + max_w = tgt_sizes[:, 1].max().item() + assert isinstance(max_h, int) and isinstance(max_w, int) + + if max_h > self.max_size[0] or max_w > self.max_size[1]: + self.max_size = ( + max(max_h, self.max_size[0]), + max(max_w, self.max_size[1]), + ) + self._set_2d_pos_cache(self.max_size, device) + + def forward(self, x: torch.Tensor, + tgt_sizes: torch.Tensor) -> torch.Tensor: + assert x.shape[0] == tgt_sizes.shape[0] + bs = x.shape[0] + + device = x.device + dtype = x.dtype + + patch_len = tgt_sizes[:, 0] * tgt_sizes[:, 1] + + self._adjust_pos_cache(tgt_sizes, device=device) + + max_patch_len = patch_len.max().item() + assert isinstance(max_patch_len, int) + + key_padding_mask = torch.zeros((bs, max_patch_len), + dtype=torch.bool, + device=device) + + pos_embed = [] + for i in range(bs): + tgt_h, tgt_w = tgt_sizes[i].tolist() + pos_embed.append(self.pos_embed[:tgt_h, :tgt_w, :].reshape( + (tgt_h * tgt_w, -1)).to(dtype)) # patches * D + key_padding_mask[i, patch_len[i]:] = True + pos_embed = torch.nn.utils.rnn.pad_sequence(pos_embed, + batch_first=True, + padding_value=0.0).permute( + 1, 0, + 2) # BLD => L * B * D + x, _ = self.kv_proj(x) # B * L * D + x = self.ln_kv(x).permute(1, 0, 2) # L * B * D + + q = self.ln_q(self.query) # Q * D + + out = self.attn( + self._repeat(q, bs), # Q * B * D + x + pos_embed, # L * B * D + L * B * D + x, + key_padding_mask=key_padding_mask, + )[0] + # out: Q * B * D + x = out.permute(1, 0, 2) # B * Q * D + + x = self.ln_post(x) + x = x @ self.proj + return x + + +def get_version_by_config(config: PretrainedConfig) -> tuple[int, ...]: + version_float = getattr(config, "version", None) + + # The old configs do not include version number + # TODO: Remove this after the HF repos are updated + if version_float is None: + if config.hidden_size == 2304 and config.query_num == 64: + return (2, 0) + return (2, 5) + version_str = str(version_float) + return tuple(int(x) for x in version_str.split(".")) + + +def _minicpmv_field_config(hf_inputs: Mapping[str, torch.Tensor]): + pixel_values = hf_inputs.get("pixel_values", torch.empty(0)) + num_images = len(pixel_values) + + video_pixel_values = hf_inputs.get("video_pixel_values", torch.empty(0)) + num_videos = len(video_pixel_values) + + return dict( + pixel_values=MultiModalFieldConfig.batched("image"), + image_sizes=MultiModalFieldConfig.batched("image"), + tgt_sizes=MultiModalFieldConfig.batched("image"), + image_embeds=MultiModalFieldConfig.batched("image"), + video_pixel_values=MultiModalFieldConfig.batched("video"), + video_image_sizes=MultiModalFieldConfig.batched("video"), + video_tgt_sizes=MultiModalFieldConfig.batched("video"), + video_embeds=MultiModalFieldConfig.batched("video"), + image_token_id=MultiModalFieldConfig.shared("image", num_images), + video_token_id=MultiModalFieldConfig.shared("video", num_videos), + ) + + +class MiniCPMVImageEmbeddingItems(DictEmbeddingItems): + + def __init__( + self, + data: Mapping[str, torch.Tensor], + fields_factory: Callable[ + [Mapping[str, torch.Tensor]], + Mapping[str, MultiModalFieldConfig], + ], + ) -> None: + super().__init__( + data, + modality="image", + required_fields={"image_embeds", "image_sizes"}, + fields_factory=fields_factory, + ) + + def get_image_size(self, index: int) -> ImageSize: + image_size = self.get(index)["image_sizes"].tolist() + return ImageSize(width=image_size[0], height=image_size[1]) + + +class MiniCPMVVideoEmbeddingItems(DictEmbeddingItems): + + def __init__( + self, + data: Mapping[str, torch.Tensor], + fields_factory: Callable[ + [Mapping[str, torch.Tensor]], + Mapping[str, MultiModalFieldConfig], + ], + ) -> None: + super().__init__( + data, + modality="video", + required_fields={"video_embeds", "video_image_sizes"}, + fields_factory=fields_factory, + ) + + def get_frame_size(self, index: int) -> ImageSize: + frame_size = self.get(index)["video_image_sizes"].tolist() + return ImageSize(width=frame_size[0], height=frame_size[1]) + + def get_num_frames(self, index: int) -> int: + return len(self.get(index)["video_image_sizes"]) + + +class MiniCPMVMultiModalDataParser(MultiModalDataParser): + + def _parse_image_data( + self, + data: Union[dict[str, torch.Tensor], ModalityData[ImageItem]], + ) -> Optional[ModalityDataItems[Any, Any]]: + if isinstance(data, dict): + return MiniCPMVImageEmbeddingItems( + data, + fields_factory=_minicpmv_field_config, + ) + + return super()._parse_image_data(data) + + def _parse_video_data( + self, + data: Union[dict[str, torch.Tensor], ModalityData[VideoItem]], + ) -> Optional[ModalityDataItems[Any, Any]]: + if isinstance(data, dict): + return MiniCPMVVideoEmbeddingItems( + data, + fields_factory=_minicpmv_field_config, + ) + + return super()._parse_video_data(data) + + +class MiniCPMVProcessingInfo(BaseProcessingInfo): + image_pattern = "(./)" + video_pattern = "()" + + def get_hf_config(self): + return self.ctx.get_hf_config() + + def get_hf_processor(self, **kwargs: object): + hf_processor = self.ctx.get_hf_processor(**kwargs) + + # NumPy arrays are considered as Iterable but not Sequence in + # https://github.com/huggingface/transformers/blob/main/src/transformers/image_transforms.py#L428 + image_processor = hf_processor.image_processor # type: ignore + for attr in ("mean", "std"): + val = getattr(image_processor, attr) + if isinstance(val, np.ndarray): + setattr(image_processor, attr, val.tolist()) + + return hf_processor + + def get_image_processor(self, **kwargs: object): + return self.get_hf_processor(**kwargs).image_processor + + def get_model_version(self): + return get_version_by_config(self.get_hf_config()) + + def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]: + mm_limits = {"image": None} + if self.get_model_version() == (2, + 6) or self.get_model_version() == (4, + 0): + mm_limits["video"] = None + + return mm_limits + + def get_slice_image_placeholder( + self, + image_size: ImageSize, + # For MiniCPM V/O 2.6 + image_idx: int = 0, + max_slice_nums: Optional[int] = None, + use_image_id: bool = True, + ) -> str: + image_processor = self.get_image_processor() + version = self.get_model_version() + + if version == (2, 0) or version == (2, 5): + return image_processor.get_slice_image_placeholder(image_size) + + return image_processor.get_slice_image_placeholder( + image_size, + image_idx=image_idx, + max_slice_nums=max_slice_nums, + use_image_id=use_image_id, + ) + + def get_sliced_grid( + self, + image_size: ImageSize, + # For MiniCPM V/O 2.6 + max_slice_nums: Optional[int] = None, + ) -> Optional[tuple[int, int]]: + image_processor = self.get_image_processor() + version = self.get_model_version() + + if version == (2, 0) or version == (2, 5): + return image_processor.get_sliced_grid(image_size) + + if max_slice_nums is None: + max_slice_nums = image_processor.max_slice_nums + + return image_processor.get_sliced_grid( + image_size, + max_slice_nums=max_slice_nums, + ) + + def get_num_image_tokens( + self, + image_size: ImageSize, + max_slice_nums: Optional[int] = None, + ) -> int: + image_processor = self.get_image_processor() + + grid = self.get_sliced_grid( + image_size, + max_slice_nums=max_slice_nums, + ) + if grid is None: + ncols = nrows = 0 + else: + ncols, nrows = grid + + return (ncols * nrows + 1) * image_processor.image_feature_size + + def get_max_image_tokens(self) -> int: + image_size = self.get_image_size_with_most_features() + return self.get_num_image_tokens(image_size) + + def get_image_max_slice_num(self) -> int: + return getattr(self.get_hf_config(), "max_slice_num", 9) + + def get_image_size_with_most_features(self) -> ImageSize: + image_size = getattr(self.get_hf_config(), "image_size", 448) + max_slice_num = self.get_image_max_slice_num() + return ImageSize(width=image_size, height=image_size * max_slice_num) + + def get_max_video_frame_tokens(self) -> int: + frame_size = self.get_video_frame_size_with_most_features() + + return self.get_num_image_tokens( + frame_size, + max_slice_nums=self.get_video_max_slice_num(), + ) + + def get_max_video_tokens( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> int: + num_frames = self.get_num_frames_with_most_features(seq_len, mm_counts) + num_video_tokens_total = self.get_max_video_frame_tokens() * num_frames + return num_video_tokens_total + + def get_video_max_slice_num(self) -> int: + return 1 + + def get_video_frame_size_with_most_features(self) -> ImageSize: + image_size = getattr(self.get_hf_config(), "image_size", 448) + max_slice_num = self.get_video_max_slice_num() + return ImageSize(width=image_size, height=image_size * max_slice_num) + + def get_max_video_frames(self, max_tokens: int) -> int: + num_frame_tokens = self.get_max_video_frame_tokens() + num_frames = max_tokens // num_frame_tokens + return num_frames + + def get_num_frames_with_most_features( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> int: + max_images = mm_counts.get("image", 0) + max_videos = mm_counts.get("video", 0) + + max_image_tokens = self.get_max_image_tokens() * max_images + max_total_frames = self.get_max_video_frames(seq_len - + max_image_tokens) + max_frames_per_video = min(max_total_frames // max(max_videos, 1), + _MAX_FRAMES_PER_VIDEO) + + return max(max_frames_per_video, 1) + + +_I = TypeVar("_I", + bound=MiniCPMVProcessingInfo, + default=MiniCPMVProcessingInfo) + + +class MiniCPMVDummyInputsBuilder(BaseDummyInputsBuilder[_I]): + + def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str: + num_images = mm_counts.get("image", 0) + num_videos = mm_counts.get("video", 0) + + image_prompt_texts = self.info.image_pattern * num_images + video_prompt_texts = self.info.video_pattern * num_videos + + return image_prompt_texts + video_prompt_texts + + def get_dummy_mm_data( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> MultiModalDataDict: + num_images = mm_counts.get("image", 0) + num_videos = mm_counts.get("video", 0) + + image_width, image_height = \ + self.info.get_image_size_with_most_features() + video_width, video_height = \ + self.info.get_video_frame_size_with_most_features() + num_video_frames = \ + self.info.get_num_frames_with_most_features(seq_len, mm_counts) + + return { + "image": + self._get_dummy_images(width=image_width, + height=image_height, + num_images=num_images), + "video": [ + self._get_dummy_images(width=video_width, + height=video_height, + num_images=num_video_frames) + ] * num_videos, + } + + +class MiniCPMVMultiModalProcessor(BaseMultiModalProcessor[_I]): + + def _get_data_parser(self) -> MultiModalDataParser: + return MiniCPMVMultiModalDataParser() + + def get_image_prompt_texts(self, + image_size: ImageSize, + image_idx: int = 0) -> str: + return self.info.get_slice_image_placeholder( + image_size, + image_idx=image_idx, + ) + + def get_video_prompt_texts(self, image_size: ImageSize, + num_frames: int) -> str: + return self.info.get_slice_image_placeholder( + image_size=image_size, + image_idx=0, + max_slice_nums=self.info.get_video_max_slice_num(), + use_image_id=False, + ) * num_frames + + def process_images( + self, + mm_data: Mapping[str, object], + mm_kwargs: Mapping[str, object], + tok_kwargs: Mapping[str, object], + ) -> Mapping[str, NestedTensors]: + if (images := mm_data.get("images")) is None: + return {} + + parsed_images = (self._get_data_parser().parse_mm_data({ + "image": images + }).get_items("image", + (MiniCPMVImageEmbeddingItems, ImageProcessorItems))) + + if isinstance(parsed_images, MiniCPMVImageEmbeddingItems): + image_inputs = {} + else: + image_inputs = self._base_call_hf_processor( + prompts=[self.info.image_pattern] * len(parsed_images), + mm_data={"images": [[image] for image in parsed_images]}, + mm_kwargs=mm_kwargs, + tok_kwargs=tok_kwargs, + out_keys={"pixel_values", "image_sizes", "tgt_sizes"}, + ) + + tokenizer = self.info.get_tokenizer() + unk_token_id = tokenizer.get_vocab()[""] + image_inputs["image_token_id"] = torch.tensor(unk_token_id) + + return image_inputs + + def process_videos( + self, + mm_data: Mapping[str, object], + mm_kwargs: Mapping[str, object], + tok_kwargs: Mapping[str, object], + ) -> Mapping[str, NestedTensors]: + if (videos := mm_data.get("videos")) is None: + return {} + + parsed_videos = (self._get_data_parser().parse_mm_data({ + "video": videos + }).get_items("video", + (MiniCPMVVideoEmbeddingItems, VideoProcessorItems))) + + if isinstance(parsed_videos, MiniCPMVVideoEmbeddingItems): + video_inputs = {} + else: + video_inputs = self._base_call_hf_processor( + prompts=[ + self.info.image_pattern * len(video) + for video in parsed_videos + ], + mm_data={"images": list(parsed_videos)}, + mm_kwargs={ + **mm_kwargs, + "max_slice_nums": + self.info.get_video_max_slice_num(), + }, + tok_kwargs=tok_kwargs, + out_keys={"pixel_values", "image_sizes", "tgt_sizes"}, + ) + + video_inputs = {f"video_{k}": v for k, v in video_inputs.items()} + + tokenizer = self.info.get_tokenizer() + unk_token_id = tokenizer.get_vocab()[""] + video_inputs["video_token_id"] = torch.tensor(unk_token_id) + + return video_inputs + + def process_mm_inputs( + self, + mm_data: Mapping[str, object], + mm_kwargs: Mapping[str, object], + tok_kwargs: Mapping[str, object], + ) -> Mapping[str, NestedTensors]: + return { + **self.process_images(mm_data, mm_kwargs, tok_kwargs), + **self.process_videos(mm_data, mm_kwargs, tok_kwargs), + } + + def _base_call_hf_processor( + self, + prompts: list[str], + mm_data: Mapping[str, Sequence[object]], + mm_kwargs: Mapping[str, object], + tok_kwargs: Mapping[str, object], + *, + out_keys: set[str], + ) -> dict[str, NestedTensors]: + # This processor supports zipping prompt and mm_data together + if self.info.get_model_version() == ( + 2, 6) or self.info.get_model_version() == (4, 0): + inputs = super()._call_hf_processor( + prompt=prompts, # type: ignore + mm_data=mm_data, + mm_kwargs=mm_kwargs, + tok_kwargs=tok_kwargs, + ) + else: + inputs = defaultdict[str, list[torch.Tensor]](list) + + for i, prompt in enumerate(prompts): + inputs_one = super()._call_hf_processor( + prompt=prompt, + mm_data={ + k: v[i] + for k, v in mm_data.items() + }, + mm_kwargs=mm_kwargs, + tok_kwargs=tok_kwargs, + ) + + for k, v in inputs_one.items(): + assert len(v) == 1, (k, len(v)) + inputs[k].append(v[0]) + + return {k: inputs[k] for k in out_keys} + + def _call_hf_processor( + self, + prompt: str, + mm_data: Mapping[str, object], + mm_kwargs: Mapping[str, object], + tok_kwargs: Mapping[str, object], + ) -> BatchFeature: + tokenizer = self.info.get_tokenizer() + + input_ids = torch.tensor([tokenizer.encode(prompt, **tok_kwargs)]) + mm_inputs = self.process_mm_inputs(mm_data, mm_kwargs, tok_kwargs) + + return BatchFeature({ + "input_ids": input_ids, + **mm_inputs, + }) + + def _hf_processor_applies_updates( + self, + prompt_text: str, + mm_items: MultiModalDataItems, + hf_processor_mm_kwargs: Mapping[str, object], + tokenization_kwargs: Mapping[str, object], + ) -> bool: + return False + + def _get_prompt_updates( + self, + mm_items: MultiModalDataItems, + hf_processor_mm_kwargs: Mapping[str, object], + out_mm_kwargs: MultiModalKwargsItems, + ) -> Sequence[PromptUpdate]: + placeholders = [("image", self.info.image_pattern), + ("video", self.info.video_pattern)] + + # hard code for inconsistency of encode-decode image_pattern + additional_placeholders = [] + tokenizer = self.info.get_tokenizer() + for modality, pattern in placeholders: + sub_pattern = tokenizer.decode( + tokenizer.encode(pattern, add_special_tokens=False)) + if sub_pattern != pattern: + additional_placeholders.append((modality, sub_pattern)) + placeholders += additional_placeholders + + def get_image_replacement(item_idx: int): + images = mm_items.get_items( + "image", (MiniCPMVImageEmbeddingItems, ImageProcessorItems)) + + image_size = images.get_image_size(item_idx) + + return PromptUpdateDetails.select_text( + self.get_image_prompt_texts(image_size, item_idx), + "", + ) + + def get_video_replacement(item_idx: int): + videos = mm_items.get_items( + "video", (MiniCPMVVideoEmbeddingItems, VideoProcessorItems)) + + frame_size = videos.get_frame_size(item_idx) + num_frames = videos.get_num_frames(item_idx) + + return PromptUpdateDetails.select_text( + self.get_video_prompt_texts(frame_size, num_frames), + "", + ) + + get_replacement = { + "image": get_image_replacement, + "video": get_video_replacement, + } + + return [ + PromptReplacement(modality=modality, + target=pattern, + replacement=get_replacement[modality]) + for modality, pattern in placeholders + ] + + def _get_mm_fields_config( + self, + hf_inputs: BatchFeature, + hf_processor_mm_kwargs: Mapping[str, object], + ) -> Mapping[str, MultiModalFieldConfig]: + return _minicpmv_field_config(hf_inputs) + + +class MiniCPMVBaseModel(nn.Module, SupportsMultiModal, SupportsPP): + """ + The abstract class of MiniCPMV can only be inherited, but cannot be + instantiated. + """ + + @classmethod + def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: + if modality.startswith("image"): + return "(./)" + if modality.startswith("video"): + return "()" + + raise ValueError("Only image or video modality is supported") + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + config = vllm_config.model_config.hf_config + multimodal_config = vllm_config.model_config.multimodal_config + quant_config = vllm_config.quant_config + super().__init__() + # All MiniCPM-V models disable `tie_word_embeddings` but + # `PretrainedConfig.tie_word_embeddings` defaults to True; we cannot + # check `tie_word_embeddings` until vLLM integrate MiniCPM-V model + # and config class + self.config = config + self.multimodal_config = multimodal_config + + self.version = get_version_by_config(self.config) + self.llm = self.init_llm(vllm_config=vllm_config, + prefix=maybe_prefix(prefix, "llm")) + self.vpm = self.init_vision_module(config, + quant_config, + prefix=maybe_prefix(prefix, "vpm")) + self.vision_dim = (self.vpm.embed_dim if self.version == (2, 0) else + self.vpm.embeddings.embed_dim) + self.embed_dim = self.config.hidden_size + + self.resampler = self.init_resampler(self.embed_dim, + self.vision_dim, + quant_config=quant_config, + prefix=maybe_prefix( + prefix, "resampler")) + + self.mm_token_ids = set[int]() + self.make_empty_intermediate_tensors = ( + self.llm.make_empty_intermediate_tensors) + + def _parse_and_validate_vision_input( + self, + modality: str, + **kwargs: object, + ) -> Optional[MiniCPMVImageInputs]: + pixel_values = kwargs.pop("pixel_values", None) + image_embeds = kwargs.pop("image_embeds", None) + + if pixel_values is None and image_embeds is None: + return None + + image_token_id = kwargs.pop("image_token_id") + if image_token_id is not None: + assert isinstance(image_token_id, torch.Tensor) + self.mm_token_ids.add(image_token_id.flatten().unique().item()) + + if image_embeds is not None: + if not isinstance(image_embeds, (torch.Tensor, list)): + raise ValueError( + f"Incorrect type of image_embeds for {modality=}. " + f"Got type: {type(image_embeds)}") + + image_embeds_flat = flatten_bn(image_embeds) + + return MiniCPMVImageEmbeddingInputs( + type="image_embeds", + image_embeds=image_embeds_flat, + ) + + if not isinstance(pixel_values, (torch.Tensor, list)): + raise ValueError( + f"Incorrect type of pixel_values for {modality=}. " + f"Got type: {type(pixel_values)}") + + tgt_sizes = kwargs.pop("tgt_sizes") + if not isinstance(tgt_sizes, (torch.Tensor, list)): + raise ValueError(f"Incorrect type of tgt_sizes for {modality=}. " + f"Got type: {type(tgt_sizes)}") + + num_slices = [[len(p) for p in ps] for ps in pixel_values] + num_slices_flat = flatten_bn(torch.tensor(num_slices)) + + pixel_values_flat = flatten_bn(flatten_2d_lists(pixel_values)) + tgt_sizes_flat = flatten_bn(flatten_2d_lists(tgt_sizes), concat=True) + + return MiniCPMVImagePixelInputs( + type="pixel_values", + pixel_values=pixel_values_flat, + tgt_sizes=tgt_sizes_flat, + num_slices=num_slices_flat, + ) + + def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: + modalities = {} + + # Preserve the order of modalities if there are multiple of them + # from the order of kwargs. + for input_key in kwargs: + if input_key in ("pixel_values", + "image_embeds") and "images" not in modalities: + modalities["images"] = self._parse_and_validate_vision_input( + "images", **kwargs) + if input_key in ("video_pixel_values", + "video_embeds") and "videos" not in modalities: + + def _image_key(video_key: str): + if video_key == "video_token_id": + return "image_token_id" + + return video_key.removeprefix("video_") + + modalities["videos"] = self._parse_and_validate_vision_input( + "videos", **{ + _image_key(k): v + for k, v in kwargs.items() + }) + + return modalities + + def _process_vision_input( + self, + image_input: MiniCPMVImageInputs, + ) -> Union[torch.Tensor, list[torch.Tensor], tuple[torch.Tensor, ...]]: + if image_input["type"] == "image_embeds": + return image_input["image_embeds"] + + image_features_flat = self.get_vision_hidden_states(image_input) + + num_slices = image_input["num_slices"] + return [ + e.flatten(0, 1) + for e in image_features_flat.split(num_slices.tolist()) + ] + + def _process_multimodal_inputs(self, modalities: dict): + # The result multimodal_embeddings is tuple of tensors, with each + # tensor correspoending to a multimodal data item (image or video). + multimodal_embeddings: tuple[torch.Tensor, ...] = () + + # NOTE: It is important to iterate over the keys in this dictionary + # to preserve the order of the modalities. + for modality in modalities: + if modality == "images": + image_input = modalities["images"] + image_features = self._process_vision_input(image_input) + multimodal_embeddings += tuple(image_features) + if modality == "videos": + video_input = modalities["videos"] + video_features = self._process_vision_input(video_input) + multimodal_embeddings += tuple(video_features) + + return multimodal_embeddings + + def get_language_model(self) -> torch.nn.Module: + return self.llm + + def get_multimodal_embeddings(self, + **kwargs: object) -> MultiModalEmbeddings: + modalities = self._parse_and_validate_multimodal_inputs(**kwargs) + if not modalities: + return [] + + return self._process_multimodal_inputs(modalities) + + def get_input_embeddings( + self, + input_ids: torch.Tensor, + multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + ) -> torch.Tensor: + inputs_embeds = self.llm.get_input_embeddings(input_ids) + if multimodal_embeddings is not None \ + and len(multimodal_embeddings) != 0: + assert len(self.mm_token_ids) > 0 + inputs_embeds = merge_multimodal_embeddings( + input_ids, + inputs_embeds, + multimodal_embeddings, + list(self.mm_token_ids), + ) + return inputs_embeds + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + **kwargs: Any, + ) -> torch.Tensor: + if intermediate_tensors is not None: + inputs_embeds = None + + # NOTE: In v1, inputs_embeds is always generated at model runner from + # `get_multimodal_embeddings` and `get_input_embeddings`, this + # condition is only for v0 compatibility. + elif inputs_embeds is None: + vision_embeddings = self.get_multimodal_embeddings(**kwargs) + + inputs_embeds = self.get_input_embeddings(input_ids, + vision_embeddings) + input_ids = None + + hidden_states = self.llm.model( + input_ids=input_ids, + positions=positions, + intermediate_tensors=intermediate_tensors, + inputs_embeds=inputs_embeds, + ) + return hidden_states + + def compute_logits( + self, + hidden_states: torch.Tensor, + sampling_metadata: SamplingMetadata, + ) -> Optional[torch.Tensor]: + return self.llm.compute_logits(hidden_states, sampling_metadata) + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + loader = AutoWeightsLoader(self) + return loader.load_weights(weights) + + def get_mm_mapping(self) -> MultiModelKeys: + """ + Get the module prefix in multimodal models + """ + return MultiModelKeys.from_string_field(language_model="llm", + connector="resampler", + tower_model="vpm") + + def init_llm( + self, + vllm_config: VllmConfig, + prefix: str = "", + ) -> nn.Module: + raise NotImplementedError + + def init_vision_module( + self, + config: PretrainedConfig, + quant_config: Optional[QuantizationConfig], + prefix: str = "", + ) -> nn.Module: + raise NotImplementedError + + def init_resampler(self, + embed_dim: int, + vision_dim: int, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "") -> nn.Module: + raise NotImplementedError + + def get_vision_hidden_states( + self, data: MiniCPMVImagePixelInputs) -> torch.Tensor: + raise NotImplementedError + + +class MiniCPMV2_0(MiniCPMVBaseModel): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + assert self.version == (2, 0) + + def init_llm( + self, + vllm_config: VllmConfig, + prefix: str = "", + ) -> nn.Module: + return MiniCPMForCausalLM(vllm_config=vllm_config, prefix=prefix) + + def init_vision_module( + self, + config: PretrainedConfig, + quant_config: Optional[QuantizationConfig], + prefix: str = "", + ) -> nn.Module: + # TODO: refactor vision model through timm wrapper from transformers + try: + import timm + except ImportError: + raise ImportError("Please install timm==0.9.10") from ImportError + + with set_default_torch_dtype(torch.float16): + model = timm.create_model( + "vit_so400m_patch14_siglip_384.webli", + pretrained=False, + num_classes=0, + dynamic_img_size=True, + dynamic_img_pad=True, + ) + + model = model.to(dtype=torch.get_default_dtype()) + + if (isinstance(model, timm.models.VisionTransformer) + and model.attn_pool is not None): + model.attn_pool = torch.nn.Identity() + + if self.config.drop_vision_last_layer: + model.blocks = model.blocks[:-1] + + return model + + def init_resampler(self, + embed_dim: int, + vision_dim: int, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "") -> nn.Module: + with set_default_torch_dtype(torch.float16): + resampler = Resampler2(embed_dim=embed_dim, + num_heads=embed_dim // 128, + grid_size=int( + math.sqrt(self.config.query_num)), + kv_dim=vision_dim, + adaptive=False, + do_post_projection=True, + quant_config=quant_config, + prefix=prefix) + + return resampler.to(device=current_platform.device_type, + dtype=torch.get_default_dtype()) + + def get_vision_hidden_states( + self, data: MiniCPMVImagePixelInputs) -> torch.Tensor: + pixel_values = data["pixel_values"] + + P_h, P_w = self.vpm.patch_embed.patch_size + dtype: torch.dtype = self.vpm.pos_embed.data.dtype + num_prefix_tokens = getattr(self.vpm, "num_prefix_tokens", 0) + + res = list[torch.Tensor]() + for pixel_value in pixel_values: + H, W = pixel_value[0].shape[-2:] + tgt_size = (math.ceil(H / P_h), math.ceil(W / P_w)) + vision_embedding = self.vpm.forward_features( + pixel_value.unsqueeze(0).type(dtype)) + + if num_prefix_tokens > 0: + vision_embedding = vision_embedding[:, num_prefix_tokens:] + res.append(self.resampler(vision_embedding, tgt_size)) + + return torch.vstack(res) + + +class MiniCPMV2_5(MiniCPMVBaseModel, SupportsLoRA): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + assert self.version == (2, 5) + + def init_llm( + self, + vllm_config: VllmConfig, + prefix: str = "", + ) -> nn.Module: + return LlamaForCausalLM(vllm_config=vllm_config, prefix=prefix) + + def init_vision_module( + self, + config: PretrainedConfig, + quant_config: Optional[QuantizationConfig], + prefix: str = "", + ) -> nn.Module: + model = Idefics2VisionTransformer(config.vision_config, + quant_config=quant_config, + prefix=prefix) + if self.config.drop_vision_last_layer: + model.encoder.layers = model.encoder.layers[:-1] + return model + + def init_resampler(self, + embed_dim: int, + vision_dim: int, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "") -> nn.Module: + with set_default_torch_dtype(torch.float16): + resampler = Resampler2_5(num_queries=self.config.query_num, + embed_dim=embed_dim, + num_heads=embed_dim // 128, + kv_dim=vision_dim, + quant_config=quant_config, + prefix=prefix) + + return resampler.to(device=current_platform.device_type, + dtype=torch.get_default_dtype()) + + def get_vision_hidden_states( + self, data: MiniCPMVImagePixelInputs) -> torch.Tensor: + pixel_values = data["pixel_values"] + tgt_sizes = data["tgt_sizes"] + + B = len(pixel_values) + P = pixel_values[0].shape[-2] + L = max(item.shape[-1] for item in pixel_values) + device = pixel_values[0].device + dtype = pixel_values[0].dtype + + all_pixel_values = torch.zeros((B, 3, P, L), + dtype=dtype, + device=device) + for i, pixel_values_item in enumerate(pixel_values): + L_item = pixel_values_item.shape[-1] + all_pixel_values[i, ..., :L_item] = pixel_values_item + + num_patches = tgt_sizes.prod(-1) + max_patches = num_patches.max().item() + assert isinstance(max_patches, int) + + patch_attn_mask = torch.zeros((B, max_patches), + dtype=torch.bool, + device=device) + for i, num_patches_item in enumerate(num_patches): + patch_attn_mask[i, :num_patches_item] = True + + vision_embedding = self.vpm( + all_pixel_values, + patch_attention_mask=patch_attn_mask.unsqueeze(1), + tgt_sizes=None, + ) + + return self.resampler(vision_embedding, tgt_sizes) + + +class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + assert self.version == (2, 6) + + def init_llm( + self, + vllm_config: VllmConfig, + prefix: str = "", + ) -> nn.Module: + return Qwen2ForCausalLM(vllm_config=vllm_config, prefix=prefix) + + def init_vision_module( + self, + config: PretrainedConfig, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> nn.Module: + model = Idefics2VisionTransformer(config.vision_config, + quant_config=quant_config, + prefix=prefix) + if self.config.drop_vision_last_layer: + model.encoder.layers = model.encoder.layers[:-1] + return model + + def init_resampler(self, + embed_dim: int, + vision_dim: int, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "") -> nn.Module: + with set_default_torch_dtype(torch.float16): + # The resampler in 2.6 remains consistent with the one in 2.5. + resampler = Resampler2_5(num_queries=self.config.query_num, + embed_dim=embed_dim, + num_heads=embed_dim // 128, + kv_dim=vision_dim, + quant_config=quant_config, + prefix=prefix) + + return resampler.to(device=current_platform.device_type, + dtype=torch.get_default_dtype()) + + def get_vision_hidden_states( + self, data: MiniCPMVImagePixelInputs) -> torch.Tensor: + pixel_values = data["pixel_values"] + tgt_sizes = data["tgt_sizes"] + + B = len(pixel_values) + P = pixel_values[0].shape[-2] + L = max(item.shape[-1] for item in pixel_values) + device = pixel_values[0].device + dtype = pixel_values[0].dtype + + all_pixel_values = torch.zeros((B, 3, P, L), + dtype=dtype, + device=device) + for i, pixel_values_item in enumerate(pixel_values): + L_item = pixel_values_item.shape[-1] + all_pixel_values[i, ..., :L_item] = pixel_values_item + + num_patches = tgt_sizes.prod(-1) + max_patches = num_patches.max().item() + assert isinstance(max_patches, int) + + patch_attn_mask = torch.zeros((B, max_patches), + dtype=torch.bool, + device=device) + for i, num_patches_item in enumerate(num_patches): + patch_attn_mask[i, :num_patches_item] = True + + vision_embedding = self.vpm( + all_pixel_values, + patch_attention_mask=patch_attn_mask.unsqueeze(1), + tgt_sizes=tgt_sizes, + ) + + return self.resampler(vision_embedding, tgt_sizes) + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + loader = AutoWeightsLoader(self, + skip_prefixes=["apm.", "audio", "tts"]) + return loader.load_weights(weights) + + +class MiniCPMV4_0(MiniCPMVBaseModel, SupportsLoRA): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + assert self.version == (4, 0) + + def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): + if isinstance(quant_config, (AWQConfig, AWQMarlinConfig)): + return None + return quant_config + + def init_llm( + self, + vllm_config: VllmConfig, + prefix: str = "", + ) -> nn.Module: + return LlamaForCausalLM(vllm_config=vllm_config, prefix=prefix) + + def init_vision_module( + self, + config: PretrainedConfig, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> nn.Module: + quant_config = self._maybe_ignore_quant_config(quant_config) + model = Idefics2VisionTransformer(config.vision_config, + quant_config=quant_config, + prefix=prefix) + if self.config.drop_vision_last_layer: + model.encoder.layers = model.encoder.layers[:-1] + return model + + def init_resampler( + self, + embed_dim: int, + vision_dim: int, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> nn.Module: + quant_config = self._maybe_ignore_quant_config(quant_config) + with set_default_torch_dtype(torch.float16): + # The resampler in 4.0 remains consistent with the one in 2.5/2.6. + resampler = Resampler2_5(num_queries=self.config.query_num, + embed_dim=embed_dim, + num_heads=embed_dim // 128, + kv_dim=vision_dim, + quant_config=quant_config, + prefix=prefix) + + return resampler.to(device=current_platform.device_type, + dtype=torch.get_default_dtype()) + + def get_vision_hidden_states( + self, data: MiniCPMVImagePixelInputs) -> torch.Tensor: + pixel_values = data["pixel_values"] + tgt_sizes = data["tgt_sizes"] + + B = len(pixel_values) + P = pixel_values[0].shape[-2] + L = max(item.shape[-1] for item in pixel_values) + device = pixel_values[0].device + dtype = pixel_values[0].dtype + + all_pixel_values = torch.zeros((B, 3, P, L), + dtype=dtype, + device=device) + for i, pixel_values_item in enumerate(pixel_values): + L_item = pixel_values_item.shape[-1] + all_pixel_values[i, ..., :L_item] = pixel_values_item + + num_patches = tgt_sizes.prod(-1) + max_patches = num_patches.max().item() + assert isinstance(max_patches, int) + + patch_attn_mask = torch.zeros((B, max_patches), + dtype=torch.bool, + device=device) + for i, num_patches_item in enumerate(num_patches): + patch_attn_mask[i, :num_patches_item] = True + + vision_embedding = self.vpm( + all_pixel_values, + patch_attention_mask=patch_attn_mask.unsqueeze(1), + tgt_sizes=tgt_sizes, + ) + + return self.resampler(vision_embedding, tgt_sizes) + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + loader = AutoWeightsLoader(self, + skip_prefixes=["apm.", "audio", "tts"]) + return loader.load_weights(weights) + + +_SUPPORT_VERSION = { + (2, 0): MiniCPMV2_0, + (2, 5): MiniCPMV2_5, + (2, 6): MiniCPMV2_6, + (4, 0): MiniCPMV4_0, +} + + +@MULTIMODAL_REGISTRY.register_processor( + MiniCPMVMultiModalProcessor, + info=MiniCPMVProcessingInfo, + dummy_inputs=MiniCPMVDummyInputsBuilder) +class MiniCPMV(MiniCPMVBaseModel, SupportsMultiModal, SupportsLoRA): + """ + Different versions of MiniCPMV use different visual encoders and LLMs, + which is not conducive to the current integration logic of LoRA and + bitsandbytes in vLLM. Therefore, it is necessary to separate them. + """ + + def __new__(cls, *, vllm_config: VllmConfig, prefix: str = ""): + config = vllm_config.model_config.hf_config + if not hasattr(config, "version"): + if config.hidden_size == 2304 and config.query_num == 64: + version = (2, 0) + else: + version = (2, 5) + else: + version = str(config.version).split(".") + version = tuple([int(x) for x in version]) + # Dispatch class based on version + instance_cls = _SUPPORT_VERSION.get(version) + if instance_cls is None: + supported_versions = ", ".join( + [f"{v[0]}.{v[1]}" for v in sorted(_SUPPORT_VERSION.keys())]) + raise ValueError(f"Currently, MiniCPMV only supports versions " + f"{supported_versions}. Got version: {version}") + + # quant_config references base class members, + # so update values before init is called + cls.packed_modules_mapping.update(instance_cls.packed_modules_mapping) + cls.embedding_modules.update(instance_cls.embedding_modules) + cls.embedding_padding_modules += instance_cls.embedding_padding_modules + return instance_cls(vllm_config=vllm_config, prefix=prefix) diff --git a/vllm-deploy/multimodal_template.py b/vllm-deploy/multimodal_template.py new file mode 100644 index 0000000000000000000000000000000000000000..9bd717e057ed29739f1f57b46bd0c3091253110f --- /dev/null +++ b/vllm-deploy/multimodal_template.py @@ -0,0 +1,1884 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +This example shows how to use vLLM for running offline inference with +the correct prompt format on vision language models for text generation. + +For most models, the prompt format should follow corresponding examples +on HuggingFace model repository. +""" + +import os +import random +from contextlib import contextmanager +from dataclasses import asdict +from typing import NamedTuple, Optional + +from huggingface_hub import snapshot_download +from transformers import AutoTokenizer + +from vllm import LLM, EngineArgs, SamplingParams +from vllm.assets.image import ImageAsset +from vllm.assets.video import VideoAsset +from vllm.lora.request import LoRARequest +from vllm.multimodal.image import convert_image_mode +from vllm.utils import FlexibleArgumentParser + + +class ModelRequestData(NamedTuple): + engine_args: EngineArgs + prompts: list[str] + stop_token_ids: Optional[list[int]] = None + lora_requests: Optional[list[LoRARequest]] = None + + +# NOTE: The default `max_num_seqs` and `max_model_len` may result in OOM on +# lower-end GPUs. +# Unless specified, these settings have been tested to work on a single L4. + + +# Aria +def run_aria(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + model_name = "rhymes-ai/Aria" + + # NOTE: Need L40 (or equivalent) to avoid OOM + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=2, + dtype="bfloat16", + limit_mm_per_prompt={modality: 1}, + ) + + prompts = [ + ( + f"<|im_start|>user\n<|img|>{question}" + "<|im_end|>\n<|im_start|>assistant\n" + ) + for question in questions + ] + + stop_token_ids = [93532, 93653, 944, 93421, 1019, 93653, 93519] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + stop_token_ids=stop_token_ids, + ) + + +# Aya Vision +def run_aya_vision(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + model_name = "CohereForAI/aya-vision-8b" + + engine_args = EngineArgs( + model=model_name, + max_model_len=2048, + max_num_seqs=2, + mm_processor_kwargs={"crop_to_patches": True}, + limit_mm_per_prompt={modality: 1}, + ) + prompts = [ + f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" + for question in questions + ] + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# BLIP-2 +def run_blip2(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + # BLIP-2 prompt format is inaccurate on HuggingFace model repository. + # See https://huggingface.co/Salesforce/blip2-opt-2.7b/discussions/15#64ff02f3f8cf9e4f5b038262 #noqa + prompts = [f"Question: {question} Answer:" for question in questions] + engine_args = EngineArgs( + model="Salesforce/blip2-opt-2.7b", + limit_mm_per_prompt={modality: 1}, + ) + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Chameleon +def run_chameleon(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + prompts = [f"{question}" for question in questions] + engine_args = EngineArgs( + model="facebook/chameleon-7b", + max_model_len=4096, + max_num_seqs=2, + limit_mm_per_prompt={modality: 1}, + ) + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +def run_command_a_vision(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + model_name = "CohereLabs/command-a-vision-07-2025" + + engine_args = EngineArgs( + model=model_name, + max_model_len=32768, + tensor_parallel_size=4, + limit_mm_per_prompt={modality: 1}, + ) + + prompts = [ + f"<|START_OF_TURN_TOKEN|><|USER_TOKEN|><|IMG_PATCH|>{question}<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" + for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Deepseek-VL2 +def run_deepseek_vl2(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + model_name = "deepseek-ai/deepseek-vl2-tiny" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=2, + hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}, + limit_mm_per_prompt={modality: 1}, + ) + + prompts = [ + f"<|User|>: \n{question}\n\n<|Assistant|>:" for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Florence2 +def run_florence2(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + engine_args = EngineArgs( + model="microsoft/Florence-2-large", + tokenizer="Isotr0py/Florence-2-tokenizer", + max_model_len=4096, + max_num_seqs=2, + trust_remote_code=True, + dtype="bfloat16", + limit_mm_per_prompt={modality: 1}, + ) + + prompts = ["" for _ in questions] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Fuyu +def run_fuyu(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + prompts = [f"{question}\n" for question in questions] + engine_args = EngineArgs( + model="adept/fuyu-8b", + max_model_len=2048, + max_num_seqs=2, + limit_mm_per_prompt={modality: 1}, + ) + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Gemma 3 +def run_gemma3(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + model_name = "google/gemma-3-4b-it" + + engine_args = EngineArgs( + model=model_name, + max_model_len=2048, + max_num_seqs=2, + mm_processor_kwargs={"do_pan_and_scan": True}, + limit_mm_per_prompt={modality: 1}, + ) + + prompts = [ + ( + "user\n" + f"{question}\n" + "model\n" + ) + for question in questions + ] + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Gemma3N +def run_gemma3n(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + model_name = "google/gemma-3n-E2B-it" + + engine_args = EngineArgs( + model=model_name, + max_model_len=2048, + max_num_seqs=2, + limit_mm_per_prompt={modality: 1}, + enforce_eager=True, + ) + + prompts = [ + ( + "user\n" + f"{question}\n" + "model\n" + ) + for question in questions + ] + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# GLM-4v +def run_glm4v(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + model_name = "zai-org/glm-4v-9b" + + engine_args = EngineArgs( + model=model_name, + max_model_len=2048, + max_num_seqs=2, + trust_remote_code=True, + enforce_eager=True, + hf_overrides={"architectures": ["GLM4VForCausalLM"]}, + limit_mm_per_prompt={modality: 1}, + ) + + prompts = [ + ( + "<|user|>\n<|begin_of_image|><|endoftext|><|end_of_image|>" + f"{question}<|assistant|>" + ) + for question in questions + ] + + stop_token_ids = [151329, 151336, 151338] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + stop_token_ids=stop_token_ids, + ) + + +# GLM-4.1V +def run_glm4_1v(questions: list[str], modality: str) -> ModelRequestData: + model_name = "zai-org/GLM-4.1V-9B-Thinking" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=2, + mm_processor_kwargs={ + "size": {"shortest_edge": 12544, "longest_edge": 47040000}, + "fps": 1, + }, + limit_mm_per_prompt={modality: 1}, + enforce_eager=True, + ) + + if modality == "image": + placeholder = "<|begin_of_image|><|image|><|end_of_image|>" + elif modality == "video": + placeholder = "<|begin_of_video|><|video|><|end_of_video|>" + + prompts = [ + ( + "[gMASK]<|system|>\nYou are a helpful assistant.<|user|>\n" + f"{placeholder}" + f"{question}<|assistant|>assistant\n" + ) + for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# GLM-4.5V +def run_glm4_5v(questions: list[str], modality: str) -> ModelRequestData: + model_name = "zai-org/GLM-4.5V" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=2, + mm_processor_kwargs={ + "size": {"shortest_edge": 12544, "longest_edge": 47040000}, + "fps": 1, + }, + limit_mm_per_prompt={modality: 1}, + enforce_eager=True, + tensor_parallel_size=4, + ) + + if modality == "image": + placeholder = "<|begin_of_image|><|image|><|end_of_image|>" + elif modality == "video": + placeholder = "<|begin_of_video|><|video|><|end_of_video|>" + + prompts = [ + ( + "[gMASK]<|system|>\nYou are a helpful assistant.<|user|>\n" + f"{placeholder}" + f"{question}<|assistant|>assistant\n" + ) + for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# GLM-4.5V-FP8 +def run_glm4_5v_fp8(questions: list[str], modality: str) -> ModelRequestData: + model_name = "zai-org/GLM-4.5V-FP8" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=2, + mm_processor_kwargs={ + "size": {"shortest_edge": 12544, "longest_edge": 47040000}, + "fps": 1, + }, + limit_mm_per_prompt={modality: 1}, + enforce_eager=True, + tensor_parallel_size=4, + ) + + if modality == "image": + placeholder = "<|begin_of_image|><|image|><|end_of_image|>" + elif modality == "video": + placeholder = "<|begin_of_video|><|video|><|end_of_video|>" + + prompts = [ + ( + "[gMASK]<|system|>\nYou are a helpful assistant.<|user|>\n" + f"{placeholder}" + f"{question}<|assistant|>assistant\n" + ) + for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# H2OVL-Mississippi +def run_h2ovl(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + model_name = "h2oai/h2ovl-mississippi-800m" + + engine_args = EngineArgs( + model=model_name, + trust_remote_code=True, + max_model_len=8192, + limit_mm_per_prompt={modality: 1}, + ) + + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + messages = [ + [{"role": "user", "content": f"\n{question}"}] for question in questions + ] + prompts = tokenizer.apply_chat_template( + messages, tokenize=False, add_generation_prompt=True + ) + + # Stop tokens for H2OVL-Mississippi + # https://huggingface.co/h2oai/h2ovl-mississippi-800m + stop_token_ids = [tokenizer.eos_token_id] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + stop_token_ids=stop_token_ids, + ) + + +# naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B +def run_hyperclovax_seed_vision( + questions: list[str], modality: str +) -> ModelRequestData: + model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B" + tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) + + engine_args = EngineArgs( + model=model_name, + trust_remote_code=True, + max_model_len=8192 if modality == "image" else 16384, + limit_mm_per_prompt={modality: 1}, + ) + + messages = list() + for question in questions: + if modality == "image": + """ + ocr: List the words in the image in raster order. + Even if the word order feels unnatural for reading, + the model will handle it as long as it follows raster order. + e.g. "Naver, CLOVA, bigshane" + lens_keywords: List the entity names in the image. + e.g. "iPhone" + lens_local_keywords: List the entity names with quads in the image. + e.g. "[0.07, 0.21, 0.92, 0.90] iPhone" + """ + messages.append( + [ + { + "role": "user", + "content": [ + { + "type": "image", + "ocr": "", + "lens_keywords": "", + "lens_local_keywords": "", + }, + { + "type": "text", + "text": question, + }, + ], + } + ] + ) + elif modality == "video": + messages.append( + [ + { + "role": "user", + "content": [ + { + "type": "video", + }, + { + "type": "text", + "text": question, + }, + ], + } + ] + ) + else: + raise ValueError(f"Unsupported modality: {modality}") + + prompts = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True, + ) + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + stop_token_ids=None, + ) + + +# Idefics3-8B-Llama3 +def run_idefics3(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + model_name = "HuggingFaceM4/Idefics3-8B-Llama3" + + engine_args = EngineArgs( + model=model_name, + max_model_len=8192, + max_num_seqs=2, + enforce_eager=True, + # if you are running out of memory, you can reduce the "longest_edge". + # see: https://huggingface.co/HuggingFaceM4/Idefics3-8B-Llama3#model-optimizations + mm_processor_kwargs={ + "size": {"longest_edge": 3 * 364}, + }, + limit_mm_per_prompt={modality: 1}, + ) + prompts = [ + (f"<|begin_of_text|>User:{question}\nAssistant:") + for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Intern-S1 +def run_interns1(questions: list[str], modality: str) -> ModelRequestData: + model_name = "internlm/Intern-S1" + + engine_args = EngineArgs( + model=model_name, + trust_remote_code=True, + max_model_len=8192, + max_num_seqs=2, + limit_mm_per_prompt={modality: 1}, + enforce_eager=True, + ) + + if modality == "image": + placeholder = "" + elif modality == "video": + placeholder = "