rushabh14
/

sam-vit-base-with-handler

@@ -1,41 +1,75 @@
 # handler.py
 import io
 import numpy as np
 from PIL import Image
 from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
-from huggingface_inference_toolkit.handler import BaseHandler
-class EndpointHandler(BaseHandler):
-    def __init__(self, model_dir):
         """
         Called once at startup.
-        The model files are mounted under /mnt/models by the Inference Endpoint.
         """
-        super().__init__(model_dir)
-        checkpoint = "/mnt/models/pytorch_model.bin"
         sam = sam_model_registry["vit_b"](checkpoint=checkpoint)
         self.mask_generator = SamAutomaticMaskGenerator(sam)
-    async def __call__(self, request):
         """
         Called on every HTTP request.
-        Expecting multipart/form-data with an 'image' field.
         """
-        form = await request.form()
-        image_bytes = form["image"].file.read()
         img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
         img_np = np.array(img)
         masks = self.mask_generator.generate(img_np)
         combined = np.zeros(img_np.shape[:2], dtype=np.uint8)
         for m in masks:
             combined[m["segmentation"]] = 255
         out = io.BytesIO()
         Image.fromarray(combined).save(out, format="PNG")
         out.seek(0)
-        # Return a JSON-able dict; binary data will be base64-encoded by the toolkit
-        return {"mask_png": out.getvalue()}

 # handler.py
 import io
+import base64
 import numpy as np
 from PIL import Image
 from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
+from typing import Dict, List, Any
+class EndpointHandler():
+    def __init__(self, path=""):
         """
         Called once at startup.
+        The model files are mounted under /opt/ml/model by default in Inference Endpoints.
         """
+        # Try different possible checkpoint paths
+        import os
+        possible_paths = [
+            os.path.join(path, "pytorch_model.bin"),
+            os.path.join(path, "model.safetensors"),
+            "/opt/ml/model/pytorch_model.bin",
+            "/opt/ml/model/model.safetensors"
+        ]
+        checkpoint = None
+        for p in possible_paths:
+            if os.path.exists(p):
+                checkpoint = p
+                break
+        if checkpoint is None:
+            raise FileNotFoundError("Could not find model checkpoint in any of the expected locations")
         sam = sam_model_registry["vit_b"](checkpoint=checkpoint)
         self.mask_generator = SamAutomaticMaskGenerator(sam)
+    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Called on every HTTP request.
+        Expecting base64 encoded image in the 'inputs' field or 'image' field.
         """
+        # Handle different input formats
+        if "inputs" in data:
+            if isinstance(data["inputs"], str):
+                # Base64 encoded image
+                image_bytes = base64.b64decode(data["inputs"])
+            elif isinstance(data["inputs"], dict) and "image" in data["inputs"]:
+                # Nested structure with image field
+                image_bytes = base64.b64decode(data["inputs"]["image"])
+            else:
+                raise ValueError("Invalid input format. Expected base64 encoded image string.")
+        elif "image" in data:
+            # Direct image field
+            image_bytes = base64.b64decode(data["image"])
+        else:
+            raise ValueError("No image found in request. Expected 'inputs' or 'image' field with base64 encoded image.")
+        # Process the image
         img = Image.open(io.BytesIO(image_bytes)).convert("RGB")
         img_np = np.array(img)
+        # Generate masks
         masks = self.mask_generator.generate(img_np)
         combined = np.zeros(img_np.shape[:2], dtype=np.uint8)
         for m in masks:
             combined[m["segmentation"]] = 255
+        # Convert result to base64
         out = io.BytesIO()
         Image.fromarray(combined).save(out, format="PNG")
         out.seek(0)
+        mask_base64 = base64.b64encode(out.getvalue()).decode('utf-8')
+        # Return in the expected format
+        return [{"mask_png_base64": mask_base64, "num_masks": len(masks)}]