ubden commited on
Commit
717bb83
·
verified ·
1 Parent(s): b774ce8

Upload 16 files

Browse files
Files changed (2) hide show
  1. handler.py +133 -4
  2. working-handler.py +245 -0
handler.py CHANGED
@@ -10,6 +10,23 @@ import base64
10
  from io import BytesIO
11
  from PIL import Image
12
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  class EndpointHandler:
@@ -23,6 +40,28 @@ class EndpointHandler:
23
  """
24
  print("🚀 Starting up PULSE-7B handler...")
25
  print("📝 Enhanced by Ubden® Team - github.com/ck-cankurt")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Let's see what hardware we're working with
28
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -89,6 +128,19 @@ class EndpointHandler:
89
  self.use_pipeline = None
90
  else:
91
  self.use_pipeline = True
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  def process_image_input(self, image_input):
94
  """
@@ -128,6 +180,56 @@ class EndpointHandler:
128
 
129
  return None
130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
132
  """
133
  Main processing function - where the magic happens!
@@ -154,7 +256,8 @@ class EndpointHandler:
154
 
155
  if isinstance(inputs, dict):
156
  # Dictionary input - check for text and image
157
- text = inputs.get("text", inputs.get("prompt", str(inputs)))
 
158
 
159
  # Check for image in various formats
160
  image_input = inputs.get("image", inputs.get("image_url", inputs.get("image_base64", None)))
@@ -178,6 +281,9 @@ class EndpointHandler:
178
  do_sample = parameters.get("do_sample", True)
179
  repetition_penalty = parameters.get("repetition_penalty", 1.0)
180
 
 
 
 
181
  # Using pipeline? Let's go!
182
  if self.use_pipeline:
183
  result = self.pipe(
@@ -192,9 +298,24 @@ class EndpointHandler:
192
 
193
  # Pipeline returns a list, let's handle it
194
  if isinstance(result, list) and len(result) > 0:
195
- return [{"generated_text": result[0].get("generated_text", "")}]
 
 
 
 
 
 
 
 
 
196
  else:
197
- return [{"generated_text": str(result)}]
 
 
 
 
 
 
198
 
199
  # Manual generation mode
200
  else:
@@ -233,7 +354,15 @@ class EndpointHandler:
233
  clean_up_tokenization_spaces=True
234
  )
235
 
236
- return [{"generated_text": generated_text}]
 
 
 
 
 
 
 
 
237
 
238
  except Exception as e:
239
  error_msg = f"Something went wrong during generation: {str(e)}"
 
10
  from io import BytesIO
11
  from PIL import Image
12
  import requests
13
+ import time
14
+
15
+ # Import utilities if available
16
+ try:
17
+ from utils import (
18
+ performance_monitor,
19
+ validate_image_input,
20
+ sanitize_parameters,
21
+ get_system_info,
22
+ create_health_check,
23
+ deepseek_client
24
+ )
25
+ UTILS_AVAILABLE = True
26
+ except ImportError:
27
+ UTILS_AVAILABLE = False
28
+ deepseek_client = None
29
+ print("⚠️ Utils module not found - performance monitoring and DeepSeek integration disabled")
30
 
31
 
32
  class EndpointHandler:
 
40
  """
41
  print("🚀 Starting up PULSE-7B handler...")
42
  print("📝 Enhanced by Ubden® Team - github.com/ck-cankurt")
43
+ import sys
44
+ print(f"🔧 Python version: {sys.version}")
45
+ print(f"🔧 PyTorch version: {torch.__version__}")
46
+
47
+ # Check transformers version
48
+ try:
49
+ import transformers
50
+ print(f"🔧 Transformers version: {transformers.__version__}")
51
+
52
+ # PULSE LLaVA works with transformers==4.37.2
53
+ if transformers.__version__ == "4.37.2":
54
+ print("✅ Using PULSE LLaVA compatible version (4.37.2)")
55
+ elif "dev" in transformers.__version__ or "git" in str(transformers.__version__):
56
+ print("⚠️ Using development version - may conflict with PULSE LLaVA")
57
+ else:
58
+ print("⚠️ Using different version - PULSE LLaVA prefers 4.37.2")
59
+ except Exception as e:
60
+ print(f"❌ Error checking transformers version: {e}")
61
+
62
+ print(f"🔧 CUDA available: {torch.cuda.is_available()}")
63
+ if torch.cuda.is_available():
64
+ print(f"🔧 CUDA device: {torch.cuda.get_device_name(0)}")
65
 
66
  # Let's see what hardware we're working with
67
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
128
  self.use_pipeline = None
129
  else:
130
  self.use_pipeline = True
131
+
132
+ # Final status report
133
+ print("\n🔍 Model Loading Status Report:")
134
+ print(f" - use_pipeline: {self.use_pipeline}")
135
+ print(f" - model: {'✅ Loaded' if self.model is not None else '❌ None'}")
136
+ print(f" - processor: {'✅ Loaded' if self.processor is not None else '❌ None'}")
137
+ print(f" - tokenizer: {'✅ Loaded' if self.tokenizer is not None else '❌ None'}")
138
+ print(f" - pipe: {'✅ Loaded' if self.pipe is not None else '❌ None'}")
139
+
140
+ if all(x is None for x in [self.model, self.processor, self.tokenizer, self.pipe]):
141
+ print("💥 CRITICAL: No model components loaded successfully!")
142
+ else:
143
+ print("✅ At least one model component loaded successfully")
144
 
145
  def process_image_input(self, image_input):
146
  """
 
180
 
181
  return None
182
 
183
+ def add_turkish_commentary(self, response: Dict[str, Any], enable_commentary: bool, timeout: int = 30) -> Dict[str, Any]:
184
+ """Add Turkish commentary to the response using DeepSeek API"""
185
+ if not enable_commentary:
186
+ return response
187
+
188
+ if not UTILS_AVAILABLE or not deepseek_client:
189
+ print("⚠️ DeepSeek client not available - skipping Turkish commentary")
190
+ response["commentary_status"] = "unavailable"
191
+ return response
192
+
193
+ if not deepseek_client.is_available():
194
+ print("⚠️ DeepSeek API key not configured - skipping Turkish commentary")
195
+ response["commentary_status"] = "api_key_missing"
196
+ return response
197
+
198
+ generated_text = response.get("generated_text", "")
199
+ if not generated_text:
200
+ print("⚠️ No generated text to comment on")
201
+ response["commentary_status"] = "no_text"
202
+ return response
203
+
204
+ print("🔄 DeepSeek ile Türkçe yorum ekleniyor...")
205
+ commentary_result = deepseek_client.get_turkish_commentary(generated_text, timeout)
206
+
207
+ if commentary_result["success"]:
208
+ response["comment_text"] = commentary_result["comment_text"]
209
+ response["commentary_model"] = commentary_result.get("model", "deepseek-chat")
210
+ response["commentary_tokens"] = commentary_result.get("tokens_used", 0)
211
+ response["commentary_status"] = "success"
212
+ print("✅ Türkçe yorum başarıyla eklendi")
213
+ else:
214
+ response["comment_text"] = ""
215
+ response["commentary_error"] = commentary_result["error"]
216
+ response["commentary_status"] = "failed"
217
+ print(f"❌ Türkçe yorum eklenemedi: {commentary_result['error']}")
218
+
219
+ return response
220
+
221
+ def health_check(self) -> Dict[str, Any]:
222
+ """Health check endpoint"""
223
+ if UTILS_AVAILABLE:
224
+ return create_health_check()
225
+ else:
226
+ return {
227
+ 'status': 'healthy',
228
+ 'model': 'PULSE-7B',
229
+ 'timestamp': time.time(),
230
+ 'handler_version': '2.0.0'
231
+ }
232
+
233
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
234
  """
235
  Main processing function - where the magic happens!
 
256
 
257
  if isinstance(inputs, dict):
258
  # Dictionary input - check for text and image
259
+ # Support query field (new) plus original text/prompt fields
260
+ text = inputs.get("query", inputs.get("text", inputs.get("prompt", str(inputs))))
261
 
262
  # Check for image in various formats
263
  image_input = inputs.get("image", inputs.get("image_url", inputs.get("image_base64", None)))
 
281
  do_sample = parameters.get("do_sample", True)
282
  repetition_penalty = parameters.get("repetition_penalty", 1.0)
283
 
284
+ # Check if Turkish commentary is requested (NEW FEATURE)
285
+ enable_turkish_commentary = parameters.get("enable_turkish_commentary", False) # Default false
286
+
287
  # Using pipeline? Let's go!
288
  if self.use_pipeline:
289
  result = self.pipe(
 
298
 
299
  # Pipeline returns a list, let's handle it
300
  if isinstance(result, list) and len(result) > 0:
301
+ generated_text = result[0].get("generated_text", "")
302
+
303
+ # Create response
304
+ response = {"generated_text": generated_text}
305
+
306
+ # Add Turkish commentary if requested (NEW FEATURE)
307
+ if enable_turkish_commentary:
308
+ response = self.add_turkish_commentary(response, True)
309
+
310
+ return [response]
311
  else:
312
+ response = {"generated_text": str(result)}
313
+
314
+ # Add Turkish commentary if requested (NEW FEATURE)
315
+ if enable_turkish_commentary:
316
+ response = self.add_turkish_commentary(response, True)
317
+
318
+ return [response]
319
 
320
  # Manual generation mode
321
  else:
 
354
  clean_up_tokenization_spaces=True
355
  )
356
 
357
+ # Create response
358
+ response = {"generated_text": generated_text}
359
+
360
+ # Add Turkish commentary if requested (NEW FEATURE)
361
+ if enable_turkish_commentary:
362
+ response = self.add_turkish_commentary(response, True)
363
+
364
+ return [response]
365
+
366
 
367
  except Exception as e:
368
  error_msg = f"Something went wrong during generation: {str(e)}"
working-handler.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PULSE-7B Enhanced Handler
3
+ Ubden® Team - Edited by https://github.com/ck-cankurt
4
+ Support: Text, Image URLs, and Base64 encoded images
5
+ """
6
+
7
+ import torch
8
+ from typing import Dict, List, Any
9
+ import base64
10
+ from io import BytesIO
11
+ from PIL import Image
12
+ import requests
13
+
14
+
15
+ class EndpointHandler:
16
+ def __init__(self, path=""):
17
+ """
18
+ Hey there! Let's get this PULSE-7B model up and running.
19
+ We'll load it from the HuggingFace hub directly, so no worries about local files.
20
+
21
+ Args:
22
+ path: Model directory path (we actually ignore this and load from HF hub)
23
+ """
24
+ print("🚀 Starting up PULSE-7B handler...")
25
+ print("📝 Enhanced by Ubden® Team - github.com/ck-cankurt")
26
+
27
+ # Let's see what hardware we're working with
28
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
29
+ print(f"🖥️ Running on: {self.device}")
30
+
31
+ try:
32
+ # First attempt - using pipeline (easiest and most stable way)
33
+ from transformers import pipeline
34
+
35
+ print("📦 Fetching model from HuggingFace Hub...")
36
+ self.pipe = pipeline(
37
+ "text-generation",
38
+ model="PULSE-ECG/PULSE-7B",
39
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
40
+ device=0 if torch.cuda.is_available() else -1,
41
+ trust_remote_code=True,
42
+ model_kwargs={
43
+ "low_cpu_mem_usage": True,
44
+ "use_safetensors": True
45
+ }
46
+ )
47
+ print("✅ Model loaded successfully via pipeline!")
48
+
49
+ except Exception as e:
50
+ print(f"⚠️ Pipeline didn't work out: {e}")
51
+ print("🔄 Let me try a different approach...")
52
+
53
+ try:
54
+ # Plan B - load model and tokenizer separately
55
+ from transformers import AutoTokenizer, LlamaForCausalLM
56
+
57
+ # Get the tokenizer ready
58
+ print("📖 Setting up tokenizer...")
59
+ self.tokenizer = AutoTokenizer.from_pretrained(
60
+ "PULSE-ECG/PULSE-7B",
61
+ trust_remote_code=True
62
+ )
63
+
64
+ # Load the model as Llama (it works, trust me!)
65
+ print("🧠 Loading the model as Llama...")
66
+ self.model = LlamaForCausalLM.from_pretrained(
67
+ "PULSE-ECG/PULSE-7B",
68
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
69
+ device_map="auto",
70
+ low_cpu_mem_usage=True,
71
+ trust_remote_code=True
72
+ )
73
+
74
+ # Quick fix for padding token if it's missing
75
+ if self.tokenizer.pad_token is None:
76
+ self.tokenizer.pad_token = self.tokenizer.eos_token
77
+ self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
78
+
79
+ self.model.eval()
80
+ self.use_pipeline = False
81
+ print("✅ Model loaded successfully via direct loading!")
82
+
83
+ except Exception as e2:
84
+ print(f"😓 That didn't work either: {e2}")
85
+ # If all else fails, we'll handle it gracefully
86
+ self.pipe = None
87
+ self.model = None
88
+ self.tokenizer = None
89
+ self.use_pipeline = None
90
+ else:
91
+ self.use_pipeline = True
92
+
93
+ def process_image_input(self, image_input):
94
+ """
95
+ Handle both URL and base64 image inputs like a champ!
96
+
97
+ Args:
98
+ image_input: Can be a URL string or base64 encoded image
99
+
100
+ Returns:
101
+ PIL Image object or None if something goes wrong
102
+ """
103
+ try:
104
+ # Check if it's a URL (starts with http/https)
105
+ if isinstance(image_input, str) and (image_input.startswith('http://') or image_input.startswith('https://')):
106
+ print(f"🌐 Fetching image from URL: {image_input[:50]}...")
107
+ response = requests.get(image_input, timeout=10)
108
+ response.raise_for_status()
109
+ image = Image.open(BytesIO(response.content)).convert('RGB')
110
+ print("✅ Image downloaded successfully!")
111
+ return image
112
+
113
+ # Must be base64 then
114
+ elif isinstance(image_input, str):
115
+ print("🔍 Decoding base64 image...")
116
+ # Remove the data URL prefix if it exists
117
+ if "base64," in image_input:
118
+ image_input = image_input.split("base64,")[1]
119
+
120
+ image_data = base64.b64decode(image_input)
121
+ image = Image.open(BytesIO(image_data)).convert('RGB')
122
+ print("✅ Image decoded successfully!")
123
+ return image
124
+
125
+ except Exception as e:
126
+ print(f"❌ Couldn't process the image: {e}")
127
+ return None
128
+
129
+ return None
130
+
131
+ def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
132
+ """
133
+ Main processing function - where the magic happens!
134
+
135
+ Args:
136
+ data: Input data with 'inputs' and optional 'parameters'
137
+
138
+ Returns:
139
+ List with the generated response
140
+ """
141
+ # Quick check - is our model ready?
142
+ if self.use_pipeline is None:
143
+ return [{
144
+ "generated_text": "Oops! Model couldn't load properly. Please check the deployment settings.",
145
+ "error": "Model initialization failed",
146
+ "handler": "Ubden® Team Enhanced Handler"
147
+ }]
148
+
149
+ try:
150
+ # Parse the inputs - flexible format support
151
+ inputs = data.get("inputs", "")
152
+ text = ""
153
+ image = None
154
+
155
+ if isinstance(inputs, dict):
156
+ # Dictionary input - check for text and image
157
+ text = inputs.get("text", inputs.get("prompt", str(inputs)))
158
+
159
+ # Check for image in various formats
160
+ image_input = inputs.get("image", inputs.get("image_url", inputs.get("image_base64", None)))
161
+ if image_input:
162
+ image = self.process_image_input(image_input)
163
+ if image:
164
+ # For now, we'll add a note about the image since we're text-only
165
+ text = f"[Image provided - {image.size[0]}x{image.size[1]} pixels] {text}"
166
+ else:
167
+ # Simple string input
168
+ text = str(inputs)
169
+
170
+ if not text:
171
+ return [{"generated_text": "Hey, I need some text to work with! Please provide an input."}]
172
+
173
+ # Get generation parameters with sensible defaults
174
+ parameters = data.get("parameters", {})
175
+ max_new_tokens = min(parameters.get("max_new_tokens", 256), 1024)
176
+ temperature = parameters.get("temperature", 0.7)
177
+ top_p = parameters.get("top_p", 0.95)
178
+ do_sample = parameters.get("do_sample", True)
179
+ repetition_penalty = parameters.get("repetition_penalty", 1.0)
180
+
181
+ # Using pipeline? Let's go!
182
+ if self.use_pipeline:
183
+ result = self.pipe(
184
+ text,
185
+ max_new_tokens=max_new_tokens,
186
+ temperature=temperature,
187
+ top_p=top_p,
188
+ do_sample=do_sample,
189
+ repetition_penalty=repetition_penalty,
190
+ return_full_text=False # Just the new stuff, not the input
191
+ )
192
+
193
+ # Pipeline returns a list, let's handle it
194
+ if isinstance(result, list) and len(result) > 0:
195
+ return [{"generated_text": result[0].get("generated_text", "")}]
196
+ else:
197
+ return [{"generated_text": str(result)}]
198
+
199
+ # Manual generation mode
200
+ else:
201
+ # Tokenize the input
202
+ encoded = self.tokenizer(
203
+ text,
204
+ return_tensors="pt",
205
+ truncation=True,
206
+ max_length=2048
207
+ )
208
+
209
+ input_ids = encoded["input_ids"].to(self.device)
210
+ attention_mask = encoded.get("attention_mask")
211
+ if attention_mask is not None:
212
+ attention_mask = attention_mask.to(self.device)
213
+
214
+ # Generate the response
215
+ with torch.no_grad():
216
+ outputs = self.model.generate(
217
+ input_ids,
218
+ attention_mask=attention_mask,
219
+ max_new_tokens=max_new_tokens,
220
+ temperature=temperature,
221
+ top_p=top_p,
222
+ do_sample=do_sample,
223
+ repetition_penalty=repetition_penalty,
224
+ pad_token_id=self.tokenizer.pad_token_id,
225
+ eos_token_id=self.tokenizer.eos_token_id
226
+ )
227
+
228
+ # Decode only the new tokens (not the input)
229
+ generated_ids = outputs[0][input_ids.shape[-1]:]
230
+ generated_text = self.tokenizer.decode(
231
+ generated_ids,
232
+ skip_special_tokens=True,
233
+ clean_up_tokenization_spaces=True
234
+ )
235
+
236
+ return [{"generated_text": generated_text}]
237
+
238
+ except Exception as e:
239
+ error_msg = f"Something went wrong during generation: {str(e)}"
240
+ print(f"❌ {error_msg}")
241
+ return [{
242
+ "generated_text": "",
243
+ "error": error_msg,
244
+ "handler": "Ubden® Team Enhanced Handler"
245
+ }]