ubden commited on
Commit
b774ce8
·
verified ·
1 Parent(s): fb9ee41

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +92 -441
handler.py CHANGED
@@ -10,211 +10,85 @@ import base64
10
  from io import BytesIO
11
  from PIL import Image
12
  import requests
13
- import time
14
-
15
- # Import utilities if available
16
- try:
17
- from utils import (
18
- performance_monitor,
19
- validate_image_input,
20
- sanitize_parameters,
21
- get_system_info,
22
- create_health_check,
23
- deepseek_client
24
- )
25
- UTILS_AVAILABLE = True
26
- except ImportError:
27
- UTILS_AVAILABLE = False
28
- deepseek_client = None
29
- print("⚠️ Utils module not found - performance monitoring and DeepSeek integration disabled")
30
 
31
 
32
  class EndpointHandler:
33
  def __init__(self, path=""):
34
  """
35
  Hey there! Let's get this PULSE-7B model up and running.
36
- We'll try to load from local files first, then fallback to HuggingFace hub.
37
 
38
  Args:
39
- path: Model directory path (defaults to current directory)
40
  """
41
  print("🚀 Starting up PULSE-7B handler...")
42
  print("📝 Enhanced by Ubden® Team - github.com/ck-cankurt")
43
- import sys
44
- print(f"🔧 Python version: {sys.version}")
45
- print(f"🔧 PyTorch version: {torch.__version__}")
46
-
47
- # Check transformers version
48
- try:
49
- import transformers
50
- print(f"🔧 Transformers version: {transformers.__version__}")
51
-
52
- # PULSE LLaVA works with transformers==4.37.2
53
- if transformers.__version__ == "4.37.2":
54
- print("✅ Using PULSE LLaVA compatible version (4.37.2)")
55
- elif "dev" in transformers.__version__ or "git" in str(transformers.__version__):
56
- print("⚠️ Using development version - may conflict with PULSE LLaVA")
57
- else:
58
- print("⚠️ Using different version - PULSE LLaVA prefers 4.37.2")
59
- except Exception as e:
60
- print(f"❌ Error checking transformers version: {e}")
61
-
62
- print(f"🔧 CUDA available: {torch.cuda.is_available()}")
63
- if torch.cuda.is_available():
64
- print(f"🔧 CUDA device: {torch.cuda.get_device_name(0)}")
65
 
66
  # Let's see what hardware we're working with
67
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
68
  print(f"🖥️ Running on: {self.device}")
69
 
70
- # Set model path - use local files if available
71
- self.model_path = path if path else "."
72
- print(f"📁 Model path: {self.model_path}")
73
-
74
- # Check if we have local model files
75
- import os
76
- local_files = {
77
- 'config': os.path.exists(os.path.join(self.model_path, 'config.json')),
78
- 'tokenizer_config': os.path.exists(os.path.join(self.model_path, 'tokenizer_config.json')),
79
- 'tokenizer_model': os.path.exists(os.path.join(self.model_path, 'tokenizer.model')),
80
- 'model_index': os.path.exists(os.path.join(self.model_path, 'model.safetensors.index.json')),
81
- 'generation_config': os.path.exists(os.path.join(self.model_path, 'generation_config.json'))
82
- }
83
-
84
- local_available = all(local_files.values())
85
- print(f"📦 Local model files: {'✅ Available' if local_available else '❌ Missing'}")
86
- for file_type, exists in local_files.items():
87
- print(f" - {file_type}: {'✅' if exists else '❌'}")
88
-
89
- # KESIN ÇÖZÜM: Local files varsa onları kullan, yoksa HuggingFace Hub
90
  try:
91
- print("📦 KESIN ÇÖZÜM: Model'in kendi architecture dosyalarını yüklüyorum...")
92
-
93
- # Önce model'in custom dosyalarını indir ve import et
94
- from transformers import AutoConfig, AutoTokenizer
95
- from transformers.utils import cached_file
96
- import importlib.util
97
- import sys
98
- import os
99
-
100
- # Model config'i yükle (local varsa local, yoksa hub)
101
- model_source = self.model_path if local_available else "PULSE-ECG/PULSE-7B"
102
- config = AutoConfig.from_pretrained(model_source, trust_remote_code=True)
103
- print(f"🔧 Model config yüklendi: {config.model_type} (source: {'local' if local_available else 'hub'})")
104
-
105
- # Custom modeling dosyasını indir veya bul
106
- try:
107
- if local_available:
108
- # Local modeling file'ı ara
109
- modeling_file = os.path.join(self.model_path, "modeling_llava.py")
110
- if not os.path.exists(modeling_file):
111
- # Local'de yoksa hub'dan indir
112
- modeling_file = cached_file("PULSE-ECG/PULSE-7B", "modeling_llava.py", _raise_exceptions_for_missing_entries=False)
113
- else:
114
- # Hub'dan indir
115
- modeling_file = cached_file("PULSE-ECG/PULSE-7B", "modeling_llava.py", _raise_exceptions_for_missing_entries=False)
116
- if modeling_file and os.path.exists(modeling_file):
117
- print(f"🔧 Custom modeling dosyası bulundu: {modeling_file}")
118
-
119
- # Dosyayı modül olarak yükle
120
- spec = importlib.util.spec_from_file_location("modeling_llava", modeling_file)
121
- modeling_module = importlib.util.module_from_spec(spec)
122
- sys.modules["modeling_llava"] = modeling_module
123
- spec.loader.exec_module(modeling_module)
124
-
125
- print("🔧 Custom modeling modülü yüklendi")
126
-
127
- # Model class'ını bul ve kullan
128
- if hasattr(modeling_module, 'LlavaLlamaForCausalLM'):
129
- print("🎯 LlavaLlamaForCausalLM bulundu, yükleniyor...")
130
-
131
- self.tokenizer = AutoTokenizer.from_pretrained(model_source, trust_remote_code=True)
132
- self.model = modeling_module.LlavaLlamaForCausalLM.from_pretrained(
133
- model_source,
134
- config=config,
135
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
136
- device_map="auto",
137
- low_cpu_mem_usage=True,
138
- trust_remote_code=True
139
- )
140
-
141
- if self.tokenizer.pad_token is None:
142
- self.tokenizer.pad_token = self.tokenizer.eos_token
143
- self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
144
-
145
- self.model.eval()
146
- self.use_pipeline = False
147
- self.pipe = None
148
- self.processor = None
149
- print("✅ PULSE-7B başarıyla custom implementation ile yüklendi!")
150
-
151
- else:
152
- raise Exception("LlavaLlamaForCausalLM class'ı bulunamadı")
153
- else:
154
- raise Exception("modeling_llava.py dosyası bulunamadı")
155
-
156
- except Exception as modeling_error:
157
- print(f"⚠️ Custom modeling yüklenemedi: {modeling_error}")
158
- raise modeling_error
159
 
160
- except Exception as e_final:
161
- print(f"😓 Custom approach da başarısız: {e_final}")
162
- print("🔄 En basit çözüme geçiyorum...")
163
 
164
- # En basit çözüm: Sadece text generation pipeline
165
  try:
166
- from transformers import pipeline, AutoTokenizer
167
-
168
- print("📦 EN BASIT ÇÖZÜM: Sadece tokenizer + basit generation...")
169
-
170
- # Sadece tokenizer yükle (local varsa local)
171
- tokenizer_source = self.model_path if local_available else "PULSE-ECG/PULSE-7B"
172
- self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_source, trust_remote_code=True)
173
- print(f"🔧 Tokenizer yüklendi (source: {'local' if local_available else 'hub'})")
174
 
175
- if self.tokenizer.pad_token is None:
176
- self.tokenizer.pad_token = self.tokenizer.eos_token
177
- self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
 
 
 
178
 
179
- # Pipeline'ı text-generation için kur
180
- pipeline_source = self.model_path if local_available else "PULSE-ECG/PULSE-7B"
181
- self.pipe = pipeline(
182
- "text-generation",
183
- tokenizer=self.tokenizer,
184
- model=pipeline_source,
185
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
186
- device=0 if torch.cuda.is_available() else -1,
 
187
  trust_remote_code=True
188
  )
189
- print(f"🔧 Pipeline kuruldu (source: {'local' if local_available else 'hub'})")
190
 
191
- self.use_pipeline = True
192
- self.model = None
193
- self.processor = None
194
- print("✅ BASIT ÇÖZÜM BAŞARILI: Tokenizer + Pipeline yüklendi!")
195
 
196
- except Exception as e_simple:
197
- print(f"💥 En basit çözüm de başarısız: {e_simple}")
198
- print(" Model hiçbir şekilde yüklenemedi")
199
 
 
 
 
 
200
  self.model = None
201
- self.processor = None
202
  self.tokenizer = None
203
- self.pipe = None
204
  self.use_pipeline = None
205
-
206
- # Final status report
207
- print("\n🔍 Model Loading Status Report:")
208
- print(f" - use_pipeline: {self.use_pipeline}")
209
- print(f" - model: {'✅ Loaded' if self.model is not None else '❌ None'}")
210
- print(f" - processor: {'✅ Loaded' if self.processor is not None else '❌ None'}")
211
- print(f" - tokenizer: {'✅ Loaded' if self.tokenizer is not None else '❌ None'}")
212
- print(f" - pipe: {'✅ Loaded' if self.pipe is not None else '❌ None'}")
213
-
214
- if all(x is None for x in [self.model, self.processor, self.tokenizer, self.pipe]):
215
- print("💥 CRITICAL: No model components loaded successfully!")
216
  else:
217
- print("✅ At least one model component loaded successfully")
218
 
219
  def process_image_input(self, image_input):
220
  """
@@ -226,127 +100,34 @@ class EndpointHandler:
226
  Returns:
227
  PIL Image object or None if something goes wrong
228
  """
229
- if not image_input or not isinstance(image_input, str):
230
- print("❌ Invalid image input provided")
231
- return None
232
-
233
  try:
234
  # Check if it's a URL (starts with http/https)
235
- if image_input.startswith(('http://', 'https://')):
236
  print(f"🌐 Fetching image from URL: {image_input[:50]}...")
237
- headers = {
238
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
239
- }
240
- response = requests.get(image_input, timeout=15, headers=headers)
241
  response.raise_for_status()
242
-
243
- # Verify it's actually an image
244
- if not response.headers.get('content-type', '').startswith('image/'):
245
- print(f"⚠️ URL doesn't seem to point to an image: {response.headers.get('content-type')}")
246
-
247
  image = Image.open(BytesIO(response.content)).convert('RGB')
248
- print(f"✅ Image downloaded successfully! Size: {image.size}")
249
  return image
250
 
251
- # Handle base64 images
252
- else:
253
- print("🔍 Processing base64 image...")
254
- base64_data = image_input
255
-
256
- # Remove data URL prefix if it exists (data:image/jpeg;base64,...)
257
- if image_input.startswith('data:'):
258
- if 'base64,' in image_input:
259
- base64_data = image_input.split('base64,')[1]
260
- else:
261
- print("❌ Invalid data URL format - missing base64 encoding")
262
- return None
263
-
264
- # Clean up any whitespace
265
- base64_data = base64_data.strip().replace('\n', '').replace('\r', '').replace(' ', '')
266
-
267
- # Validate base64 format
268
- try:
269
- # Add padding if necessary
270
- missing_padding = len(base64_data) % 4
271
- if missing_padding:
272
- base64_data += '=' * (4 - missing_padding)
273
-
274
- image_data = base64.b64decode(base64_data, validate=True)
275
- except Exception as decode_error:
276
- print(f"❌ Invalid base64 encoding: {decode_error}")
277
- return None
278
-
279
- # Verify it's a valid image
280
- if len(image_data) < 100: # Too small to be a real image
281
- print("❌ Decoded data too small to be a valid image")
282
- return None
283
 
 
284
  image = Image.open(BytesIO(image_data)).convert('RGB')
285
- print(f"✅ Base64 image decoded successfully! Size: {image.size}")
286
  return image
287
 
288
- except requests.exceptions.Timeout:
289
- print("❌ Request timeout - image URL took too long to respond")
290
- return None
291
- except requests.exceptions.RequestException as e:
292
- print(f"❌ Network error while fetching image: {e}")
293
- return None
294
  except Exception as e:
295
- print(f"❌ Error processing image: {e}")
296
  return None
297
 
298
  return None
299
 
300
- def add_turkish_commentary(self, response: Dict[str, Any], enable_commentary: bool, timeout: int = 30) -> Dict[str, Any]:
301
- """Add Turkish commentary to the response using DeepSeek API"""
302
- if not enable_commentary:
303
- return response
304
-
305
- if not UTILS_AVAILABLE or not deepseek_client:
306
- print("⚠️ DeepSeek client not available - skipping Turkish commentary")
307
- response["commentary_status"] = "unavailable"
308
- return response
309
-
310
- if not deepseek_client.is_available():
311
- print("⚠️ DeepSeek API key not configured - skipping Turkish commentary")
312
- response["commentary_status"] = "api_key_missing"
313
- return response
314
-
315
- generated_text = response.get("generated_text", "")
316
- if not generated_text:
317
- print("⚠️ No generated text to comment on")
318
- response["commentary_status"] = "no_text"
319
- return response
320
-
321
- print("🔄 DeepSeek ile Türkçe yorum ekleniyor...")
322
- commentary_result = deepseek_client.get_turkish_commentary(generated_text, timeout)
323
-
324
- if commentary_result["success"]:
325
- response["comment_text"] = commentary_result["comment_text"]
326
- response["commentary_model"] = commentary_result.get("model", "deepseek-chat")
327
- response["commentary_tokens"] = commentary_result.get("tokens_used", 0)
328
- response["commentary_status"] = "success"
329
- print("✅ Türkçe yorum başarıyla eklendi")
330
- else:
331
- response["comment_text"] = ""
332
- response["commentary_error"] = commentary_result["error"]
333
- response["commentary_status"] = "failed"
334
- print(f"❌ Türkçe yorum eklenemedi: {commentary_result['error']}")
335
-
336
- return response
337
-
338
- def health_check(self) -> Dict[str, Any]:
339
- """Health check endpoint"""
340
- if UTILS_AVAILABLE:
341
- return create_health_check()
342
- else:
343
- return {
344
- 'status': 'healthy',
345
- 'model': 'PULSE-7B',
346
- 'timestamp': time.time(),
347
- 'handler_version': '2.0.0'
348
- }
349
-
350
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
351
  """
352
  Main processing function - where the magic happens!
@@ -358,19 +139,13 @@ class EndpointHandler:
358
  List with the generated response
359
  """
360
  # Quick check - is our model ready?
361
- if (self.use_pipeline is None and self.model is None and self.pipe is None):
362
  return [{
363
  "generated_text": "Oops! Model couldn't load properly. Please check the deployment settings.",
364
- "error": "Model initialization failed - all loading approaches failed",
365
  "handler": "Ubden® Team Enhanced Handler"
366
  }]
367
 
368
- # Performance monitoring
369
- start_time = time.time()
370
- request_type = "text_only"
371
- success = False
372
- image_processing_time = 0.0
373
-
374
  try:
375
  # Parse the inputs - flexible format support
376
  inputs = data.get("inputs", "")
@@ -379,31 +154,15 @@ class EndpointHandler:
379
 
380
  if isinstance(inputs, dict):
381
  # Dictionary input - check for text and image
382
- # Support multiple text field names: query, text, prompt
383
- text = inputs.get("query", inputs.get("text", inputs.get("prompt", "")))
384
 
385
  # Check for image in various formats
386
  image_input = inputs.get("image", inputs.get("image_url", inputs.get("image_base64", None)))
387
  if image_input:
388
- # Determine request type and validate input
389
- if UTILS_AVAILABLE:
390
- validation = validate_image_input(image_input)
391
- request_type = validation.get('type', 'unknown')
392
- if request_type == 'url':
393
- request_type = 'image_url'
394
- else:
395
- request_type = 'image_url' if image_input.startswith(('http://', 'https://')) else 'base64'
396
-
397
- # Process image with timing
398
- image_start = time.time()
399
  image = self.process_image_input(image_input)
400
- image_processing_time = time.time() - image_start
401
-
402
  if image:
403
- print(f"✅ Image processed successfully: {image.size[0]}x{image.size[1]} pixels")
404
- # For now, add image description to text (text-only mode)
405
- text = f"[ECG Image Analysis Request - Image Size: {image.size[0]}x{image.size[1]} pixels] {text}"
406
- print(f"🔄 Running in text-only mode with image context")
407
  else:
408
  # Simple string input
409
  text = str(inputs)
@@ -413,105 +172,38 @@ class EndpointHandler:
413
 
414
  # Get generation parameters with sensible defaults
415
  parameters = data.get("parameters", {})
416
-
417
- # Check if Turkish commentary is requested
418
- enable_turkish_commentary = parameters.get("enable_turkish_commentary", False) # Default false
419
- deepseek_timeout = parameters.get("deepseek_timeout", 30)
420
-
421
- # Use utils for parameter sanitization if available
422
- if UTILS_AVAILABLE:
423
- sanitized_params = sanitize_parameters(parameters)
424
- max_new_tokens = sanitized_params["max_new_tokens"]
425
- temperature = sanitized_params["temperature"]
426
- top_p = sanitized_params["top_p"]
427
- repetition_penalty = sanitized_params["repetition_penalty"]
428
- stop_sequences = sanitized_params["stop"]
429
- return_full_text = sanitized_params["return_full_text"]
430
- do_sample = sanitized_params["do_sample"]
431
- else:
432
- max_new_tokens = min(parameters.get("max_new_tokens", 512), 2048)
433
- temperature = max(0.01, min(parameters.get("temperature", 0.2), 2.0))
434
- top_p = max(0.01, min(parameters.get("top_p", 0.9), 1.0))
435
- do_sample = parameters.get("do_sample", temperature > 0.01)
436
- repetition_penalty = max(1.0, min(parameters.get("repetition_penalty", 1.05), 2.0))
437
- stop_sequences = parameters.get("stop", ["</s>"])
438
- return_full_text = parameters.get("return_full_text", False)
439
-
440
- print(f"🎛️ Generation params: max_tokens={max_new_tokens}, temp={temperature}, top_p={top_p}, rep_penalty={repetition_penalty}")
441
 
442
  # Using pipeline? Let's go!
443
- if self.use_pipeline and self.pipe is not None:
444
- generation_kwargs = {
445
- "max_new_tokens": max_new_tokens,
446
- "temperature": temperature,
447
- "top_p": top_p,
448
- "do_sample": do_sample,
449
- "repetition_penalty": repetition_penalty,
450
- "return_full_text": return_full_text
451
- }
452
-
453
- # Add stop sequences if supported
454
- if stop_sequences and stop_sequences != ["</s>"]:
455
- generation_kwargs["stop_sequence"] = stop_sequences[0] # Most pipelines support single stop
456
-
457
- result = self.pipe(text, **generation_kwargs)
458
 
459
- # Pipeline returns a list, let's handle it properly
460
  if isinstance(result, list) and len(result) > 0:
461
- generated_text = result[0].get("generated_text", "")
462
- # Clean up any stop sequences that might remain
463
- for stop_seq in stop_sequences:
464
- if generated_text.endswith(stop_seq):
465
- generated_text = generated_text[:-len(stop_seq)].rstrip()
466
-
467
- success = True
468
- result = {
469
- "generated_text": generated_text,
470
- "model": "PULSE-7B",
471
- "processing_method": "pipeline"
472
- }
473
-
474
- # Add Turkish commentary if requested
475
- result = self.add_turkish_commentary(result, enable_turkish_commentary, deepseek_timeout)
476
-
477
- # Log performance metrics
478
- if UTILS_AVAILABLE:
479
- generation_time = time.time() - start_time
480
- performance_monitor.log_request(
481
- request_type, success, generation_time, image_processing_time
482
- )
483
-
484
- return [result]
485
  else:
486
- success = True
487
- result_dict = {
488
- "generated_text": str(result),
489
- "model": "PULSE-7B",
490
- "processing_method": "pipeline"
491
- }
492
-
493
- # Add Turkish commentary if requested
494
- result_dict = self.add_turkish_commentary(result_dict, enable_turkish_commentary, deepseek_timeout)
495
-
496
- # Log performance metrics
497
- if UTILS_AVAILABLE:
498
- generation_time = time.time() - start_time
499
- performance_monitor.log_request(
500
- request_type, success, generation_time, image_processing_time
501
- )
502
-
503
- return [result_dict]
504
 
505
- # Manual generation mode (text-only with tokenizer)
506
- elif self.model is not None and self.tokenizer is not None:
507
- print(f"🔥 Using manual generation with tokenizer: '{text[:50]}...'")
508
-
509
- # Simple tokenizer-based generation
510
  encoded = self.tokenizer(
511
  text,
512
  return_tensors="pt",
513
  truncation=True,
514
- max_length=4096
515
  )
516
 
517
  input_ids = encoded["input_ids"].to(self.device)
@@ -519,9 +211,10 @@ class EndpointHandler:
519
  if attention_mask is not None:
520
  attention_mask = attention_mask.to(self.device)
521
 
 
522
  with torch.no_grad():
523
  outputs = self.model.generate(
524
- input_ids=input_ids,
525
  attention_mask=attention_mask,
526
  max_new_tokens=max_new_tokens,
527
  temperature=temperature,
@@ -532,6 +225,7 @@ class EndpointHandler:
532
  eos_token_id=self.tokenizer.eos_token_id
533
  )
534
 
 
535
  generated_ids = outputs[0][input_ids.shape[-1]:]
536
  generated_text = self.tokenizer.decode(
537
  generated_ids,
@@ -539,56 +233,13 @@ class EndpointHandler:
539
  clean_up_tokenization_spaces=True
540
  )
541
 
542
- # Clean up any remaining stop sequences
543
- for stop_seq in stop_sequences:
544
- if generated_text.endswith(stop_seq):
545
- generated_text = generated_text[:-len(stop_seq)].rstrip()
546
-
547
- success = True
548
- result = {
549
- "generated_text": generated_text.strip(),
550
- "model": "PULSE-7B",
551
- "processing_method": "manual_text_only"
552
- }
553
-
554
- # Add Turkish commentary if requested
555
- result = self.add_turkish_commentary(result, enable_turkish_commentary, deepseek_timeout)
556
-
557
- # Log performance metrics
558
- if UTILS_AVAILABLE:
559
- generation_time = time.time() - start_time
560
- performance_monitor.log_request(
561
- request_type, success, generation_time, image_processing_time
562
- )
563
-
564
- return [result]
565
-
566
- # If we reach here, no model is available
567
- else:
568
- print("❌ No model available for generation")
569
- return [{
570
- "generated_text": "",
571
- "error": "No model available for generation - all loading methods failed",
572
- "model": "PULSE-7B",
573
- "processing_method": "none",
574
- "success": False
575
- }]
576
 
577
  except Exception as e:
578
- error_msg = f"Generation error: {str(e)}"
579
  print(f"❌ {error_msg}")
580
-
581
- # Log failed request
582
- if UTILS_AVAILABLE:
583
- generation_time = time.time() - start_time
584
- performance_monitor.log_request(
585
- request_type, success, generation_time, image_processing_time
586
- )
587
-
588
  return [{
589
  "generated_text": "",
590
  "error": error_msg,
591
- "model": "PULSE-7B",
592
- "handler": "Ubden® Team Enhanced Handler",
593
- "success": False
594
  }]
 
10
  from io import BytesIO
11
  from PIL import Image
12
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  class EndpointHandler:
16
  def __init__(self, path=""):
17
  """
18
  Hey there! Let's get this PULSE-7B model up and running.
19
+ We'll load it from the HuggingFace hub directly, so no worries about local files.
20
 
21
  Args:
22
+ path: Model directory path (we actually ignore this and load from HF hub)
23
  """
24
  print("🚀 Starting up PULSE-7B handler...")
25
  print("📝 Enhanced by Ubden® Team - github.com/ck-cankurt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Let's see what hardware we're working with
28
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
  print(f"🖥️ Running on: {self.device}")
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  try:
32
+ # First attempt - using pipeline (easiest and most stable way)
33
+ from transformers import pipeline
34
+
35
+ print("📦 Fetching model from HuggingFace Hub...")
36
+ self.pipe = pipeline(
37
+ "text-generation",
38
+ model="PULSE-ECG/PULSE-7B",
39
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
40
+ device=0 if torch.cuda.is_available() else -1,
41
+ trust_remote_code=True,
42
+ model_kwargs={
43
+ "low_cpu_mem_usage": True,
44
+ "use_safetensors": True
45
+ }
46
+ )
47
+ print("✅ Model loaded successfully via pipeline!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ except Exception as e:
50
+ print(f"⚠️ Pipeline didn't work out: {e}")
51
+ print("🔄 Let me try a different approach...")
52
 
 
53
  try:
54
+ # Plan B - load model and tokenizer separately
55
+ from transformers import AutoTokenizer, LlamaForCausalLM
 
 
 
 
 
 
56
 
57
+ # Get the tokenizer ready
58
+ print("📖 Setting up tokenizer...")
59
+ self.tokenizer = AutoTokenizer.from_pretrained(
60
+ "PULSE-ECG/PULSE-7B",
61
+ trust_remote_code=True
62
+ )
63
 
64
+ # Load the model as Llama (it works, trust me!)
65
+ print("🧠 Loading the model as Llama...")
66
+ self.model = LlamaForCausalLM.from_pretrained(
67
+ "PULSE-ECG/PULSE-7B",
 
 
68
  torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
69
+ device_map="auto",
70
+ low_cpu_mem_usage=True,
71
  trust_remote_code=True
72
  )
 
73
 
74
+ # Quick fix for padding token if it's missing
75
+ if self.tokenizer.pad_token is None:
76
+ self.tokenizer.pad_token = self.tokenizer.eos_token
77
+ self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
78
 
79
+ self.model.eval()
80
+ self.use_pipeline = False
81
+ print(" Model loaded successfully via direct loading!")
82
 
83
+ except Exception as e2:
84
+ print(f"😓 That didn't work either: {e2}")
85
+ # If all else fails, we'll handle it gracefully
86
+ self.pipe = None
87
  self.model = None
 
88
  self.tokenizer = None
 
89
  self.use_pipeline = None
 
 
 
 
 
 
 
 
 
 
 
90
  else:
91
+ self.use_pipeline = True
92
 
93
  def process_image_input(self, image_input):
94
  """
 
100
  Returns:
101
  PIL Image object or None if something goes wrong
102
  """
 
 
 
 
103
  try:
104
  # Check if it's a URL (starts with http/https)
105
+ if isinstance(image_input, str) and (image_input.startswith('http://') or image_input.startswith('https://')):
106
  print(f"🌐 Fetching image from URL: {image_input[:50]}...")
107
+ response = requests.get(image_input, timeout=10)
 
 
 
108
  response.raise_for_status()
 
 
 
 
 
109
  image = Image.open(BytesIO(response.content)).convert('RGB')
110
+ print("✅ Image downloaded successfully!")
111
  return image
112
 
113
+ # Must be base64 then
114
+ elif isinstance(image_input, str):
115
+ print("🔍 Decoding base64 image...")
116
+ # Remove the data URL prefix if it exists
117
+ if "base64," in image_input:
118
+ image_input = image_input.split("base64,")[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ image_data = base64.b64decode(image_input)
121
  image = Image.open(BytesIO(image_data)).convert('RGB')
122
+ print("✅ Image decoded successfully!")
123
  return image
124
 
 
 
 
 
 
 
125
  except Exception as e:
126
+ print(f"❌ Couldn't process the image: {e}")
127
  return None
128
 
129
  return None
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
132
  """
133
  Main processing function - where the magic happens!
 
139
  List with the generated response
140
  """
141
  # Quick check - is our model ready?
142
+ if self.use_pipeline is None:
143
  return [{
144
  "generated_text": "Oops! Model couldn't load properly. Please check the deployment settings.",
145
+ "error": "Model initialization failed",
146
  "handler": "Ubden® Team Enhanced Handler"
147
  }]
148
 
 
 
 
 
 
 
149
  try:
150
  # Parse the inputs - flexible format support
151
  inputs = data.get("inputs", "")
 
154
 
155
  if isinstance(inputs, dict):
156
  # Dictionary input - check for text and image
157
+ text = inputs.get("text", inputs.get("prompt", str(inputs)))
 
158
 
159
  # Check for image in various formats
160
  image_input = inputs.get("image", inputs.get("image_url", inputs.get("image_base64", None)))
161
  if image_input:
 
 
 
 
 
 
 
 
 
 
 
162
  image = self.process_image_input(image_input)
 
 
163
  if image:
164
+ # For now, we'll add a note about the image since we're text-only
165
+ text = f"[Image provided - {image.size[0]}x{image.size[1]} pixels] {text}"
 
 
166
  else:
167
  # Simple string input
168
  text = str(inputs)
 
172
 
173
  # Get generation parameters with sensible defaults
174
  parameters = data.get("parameters", {})
175
+ max_new_tokens = min(parameters.get("max_new_tokens", 256), 1024)
176
+ temperature = parameters.get("temperature", 0.7)
177
+ top_p = parameters.get("top_p", 0.95)
178
+ do_sample = parameters.get("do_sample", True)
179
+ repetition_penalty = parameters.get("repetition_penalty", 1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  # Using pipeline? Let's go!
182
+ if self.use_pipeline:
183
+ result = self.pipe(
184
+ text,
185
+ max_new_tokens=max_new_tokens,
186
+ temperature=temperature,
187
+ top_p=top_p,
188
+ do_sample=do_sample,
189
+ repetition_penalty=repetition_penalty,
190
+ return_full_text=False # Just the new stuff, not the input
191
+ )
 
 
 
 
 
192
 
193
+ # Pipeline returns a list, let's handle it
194
  if isinstance(result, list) and len(result) > 0:
195
+ return [{"generated_text": result[0].get("generated_text", "")}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  else:
197
+ return [{"generated_text": str(result)}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ # Manual generation mode
200
+ else:
201
+ # Tokenize the input
 
 
202
  encoded = self.tokenizer(
203
  text,
204
  return_tensors="pt",
205
  truncation=True,
206
+ max_length=2048
207
  )
208
 
209
  input_ids = encoded["input_ids"].to(self.device)
 
211
  if attention_mask is not None:
212
  attention_mask = attention_mask.to(self.device)
213
 
214
+ # Generate the response
215
  with torch.no_grad():
216
  outputs = self.model.generate(
217
+ input_ids,
218
  attention_mask=attention_mask,
219
  max_new_tokens=max_new_tokens,
220
  temperature=temperature,
 
225
  eos_token_id=self.tokenizer.eos_token_id
226
  )
227
 
228
+ # Decode only the new tokens (not the input)
229
  generated_ids = outputs[0][input_ids.shape[-1]:]
230
  generated_text = self.tokenizer.decode(
231
  generated_ids,
 
233
  clean_up_tokenization_spaces=True
234
  )
235
 
236
+ return [{"generated_text": generated_text}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
  except Exception as e:
239
+ error_msg = f"Something went wrong during generation: {str(e)}"
240
  print(f"❌ {error_msg}")
 
 
 
 
 
 
 
 
241
  return [{
242
  "generated_text": "",
243
  "error": error_msg,
244
+ "handler": "Ubden® Team Enhanced Handler"
 
 
245
  }]