buildborderless
/

CommunityForensics-DeepfakeDet-ViT

+import os
+import random
+import cv2
+from datetime import datetime
+import logging
+# Set up logging configuration
+log_file = "sample_images.log"
+logging.basicConfig(filename=log_file, level=logging.INFO,
+                    format='%(asctime)s - %(levelname)s - %(message)s')
+def detect_faces(image_path):
+    # Load the pre-trained Haar Cascade model for face detection
+    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
+    # Read the image in grayscale
+    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+    if image is None:
+        return False
+    # Detect faces in the image
+    faces = face_cascade.detectMultiScale(image, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
+    # Return True if at least one face is detected
+    return len(faces) > 0
+def sample_images(input_folder, output_folder, sample_rate=0.2):
+    # Ensure the output folder exists
+    if not os.path.exists(output_folder):
+        os.makedirs(output_folder)
+    # Initialize counters and start time
+    total_files = 0
+    sampled_files = 0
+    start_time = datetime.now()
+    # Walk through the input folder structure
+    for root, dirs, files in os.walk(input_folder):
+        relative_path = os.path.relpath(root, input_folder)
+        output_subfolder = os.path.join(output_folder, relative_path)
+        if not os.path.exists(output_subfolder):
+            os.makedirs(output_subfolder)
+        total_files += len(files)
+        # Sample files in this directory
+        sampled_files_this_batch = []
+        for file in files:
+            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
+                input_file_path = os.path.join(root, file)
+                if detect_faces(input_file_path):
+                    sampled_files_this_batch.append(file)
+        sampled_files += len(sampled_files_this_batch)
+        for file in files:
+            if file in sampled_files_this_batch:
+                input_file_path = os.path.join(root, file)
+                output_file_path = os.path.join(output_subfolder, file)
+                os.link(input_file_path, output_file_path)
+                # Log the action
+                logging.info(f"Sampled and copied {input_file_path} to {output_file_path}")
+        elapsed_time = datetime.now() - start_time
+        print(f"Processed {sampled_files}/{total_files} files in {elapsed_time}")
+    end_time = datetime.now()
+    total_time = end_time - start_time
+    logging.info(f"Total time taken: {total_time}")
+    logging.info(f"Sampled {sampled_files} out of {total_files} files.")
+if __name__ == "__main__":
+    input_folder = "EvalSet"
+    output_folder = "resampledEvalSet"
+    sample_images(input_folder, output_folder)