lewishamilton21 commited on
Commit
a76635d
·
verified ·
1 Parent(s): f7bfdc6

Upload 3 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Multilingual_best_replies_train.jsonl filter=lfs diff=lfs merge=lfs -text
Multilingual Dataset.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
Multilingual_best_replies_train.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f618537b3577f7863450b6f2e9121a2dc783f00a29e47cc7500e4ec7d646101
3
+ size 20877686
qwen_1_5b_llm_fine_tuned_chatbot.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Qwen 1.5B LLM fine-tuned Chatbot.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1zLLSY8_XQNi5ftb-2otUN-SVr6zaYTih
8
+ """
9
+
10
+ !pip install -U transformers datasets peft accelerate
11
+ !pip install -U bitsandbytes --force-reinstall
12
+
13
+ """Model page: https://huggingface.co/Gensyn/Qwen2.5-1.5B-Instruct
14
+
15
+ ⚠️ If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/Gensyn/Qwen2.5-1.5B-Instruct)
16
+ and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) 🙏
17
+ """
18
+
19
+
20
+
21
+ # Use a pipeline as a high-level helper
22
+ from transformers import pipeline
23
+
24
+ pipe = pipeline("text-generation", model="Gensyn/Qwen2.5-1.5B-Instruct")
25
+ messages = [
26
+ {"role": "user", "content": "Who are you?"},
27
+ ]
28
+ pipe(messages)
29
+
30
+ # Load model directly
31
+ # Load model directly
32
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
33
+ from datasets import load_dataset
34
+ from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
35
+ import torch
36
+ from transformers import BitsAndBytesConfig
37
+ import gradio as gr
38
+
39
+ model_name = "Gensyn/Qwen2.5-1.5B-Instruct"
40
+
41
+ quantization_config = BitsAndBytesConfig(
42
+ load_in_4bit=True,
43
+ bnb_4bit_quant_type="nf4",
44
+ bnb_4bit_use_double_quant=True,
45
+ bnb_4bit_compute_dtype=torch.bfloat16
46
+ )
47
+
48
+ model = AutoModelForCausalLM.from_pretrained(
49
+ model_name,
50
+ quantization_config=quantization_config,
51
+ device_map="auto"
52
+ )
53
+
54
+
55
+ model.config.use_cache = False
56
+ model.gradient_checkpointing_enable()
57
+
58
+ token = ""
59
+ tokenizer = AutoTokenizer.from_pretrained("Gensyn/Qwen2.5-1.5B-Instruct", token=token)
60
+ model = AutoModelForCausalLM.from_pretrained("Gensyn/Qwen2.5-1.5B-Instruct", token=token)
61
+
62
+ # Load tokenizer and quantized model
63
+ model_name = "Gensyn/Qwen2.5-1.5B-Instruct"
64
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
65
+ model = AutoModelForCausalLM.from_pretrained(
66
+ model_name,
67
+ load_in_4bit=True,
68
+ device_map="auto"
69
+ )
70
+
71
+ # Prepare model for LoRA fine-tuning
72
+ model = prepare_model_for_kbit_training(model)
73
+
74
+ # LoRA configuration
75
+ lora_config = LoraConfig(
76
+ r=16,
77
+ lora_alpha=32,
78
+ target_modules=["q_proj", "v_proj"],
79
+ lora_dropout=0.05,
80
+ bias="none",
81
+ task_type="CAUSAL_LM"
82
+ )
83
+
84
+ # Apply LoRA adapters
85
+ model = get_peft_model(model, lora_config)
86
+
87
+ # Load a tiny example dataset
88
+ dataset = load_dataset("timdettmers/openassistant-guanaco", split="train")
89
+ dataset = dataset.select(range(int(len(dataset) * 0.005)))
90
+
91
+ # Tokenize data
92
+ def tokenize_function(examples):
93
+ tokenized_examples = tokenizer(examples["text"], truncation=True, max_length=512, padding="max_length", return_tensors="pt")
94
+ tokenized_examples["labels"] = tokenized_examples["input_ids"].clone()
95
+ return tokenized_examples
96
+
97
+ tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
98
+
99
+ # Training arguments
100
+ training_args = TrainingArguments(
101
+ output_dir="./peft-lora-output",
102
+ per_device_train_batch_size=4,
103
+ num_train_epochs=3,
104
+ learning_rate=2e-4,
105
+ fp16=True,
106
+ logging_steps=10,
107
+ save_steps=50,
108
+ save_total_limit=1,
109
+ report_to="none"
110
+ )
111
+
112
+ # Trainer setup
113
+ trainer = Trainer(
114
+ model=model,
115
+ args=training_args,
116
+ train_dataset=tokenized_datasets
117
+ )
118
+
119
+ # Start training
120
+ trainer.train()
121
+
122
+ # Save LoRA adapter weights
123
+ model.save_pretrained("./peft-lora-output")
124
+ tokenizer.save_pretrained("./peft-lora-output")
125
+
126
+ from transformers import pipeline
127
+
128
+ # Load base model in 4-bit again
129
+ base_model = AutoModelForCausalLM.from_pretrained(
130
+ model_name,
131
+ load_in_4bit=True,
132
+ device_map="auto"
133
+ )
134
+
135
+ # Load LoRA adapters into base model
136
+ from peft import PeftModel
137
+ peft_model = PeftModel.from_pretrained(base_model, "./peft-lora-output")
138
+
139
+ # Load tokenizer
140
+ tokenizer = AutoTokenizer.from_pretrained("./peft-lora-output")
141
+
142
+ # Text generation pipeline using base model + LoRA
143
+ pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer)
144
+
145
+ # Example prompt
146
+ prompt = "什么是人工智能"
147
+
148
+ # Generate output
149
+ output = pipe(prompt, max_length=100, do_sample=True, temperature=0.7)
150
+
151
+ # Display generated text
152
+ print("Prompt:\n", prompt)
153
+ print("\nGenerated Text:\n", output[0]['generated_text'])
154
+
155
+ !pip install gradio
156
+
157
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
158
+ from peft import PeftModel
159
+ import torch
160
+ import gradio as gr
161
+
162
+ # Model path (base or LoRA adapter checkpoint)
163
+ model_name = "Gensyn/Qwen2.5-1.5B-Instruct"
164
+
165
+ # 4-bit quantization config
166
+ bnb_config = BitsAndBytesConfig(
167
+ load_in_4bit=True,
168
+ bnb_4bit_compute_dtype=torch.float16,
169
+ bnb_4bit_use_double_quant=True,
170
+ bnb_4bit_quant_type="nf4"
171
+ )
172
+
173
+ # Load tokenizer
174
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
175
+
176
+ # Load model with quantization and device mapping
177
+ model = AutoModelForCausalLM.from_pretrained(
178
+ model_name,
179
+ quantization_config=bnb_config,
180
+ device_map="auto"
181
+ )
182
+
183
+ # If using LoRA adapter — optionally merge:
184
+ # model = PeftModel.from_pretrained(model, "path/to/lora-adapter")
185
+
186
+ # Function to handle multi-turn chat
187
+ def chat(user_input, chat_history):
188
+ # Prepare full conversation context
189
+ chat_history.append(("User", user_input))
190
+ full_prompt = ""
191
+
192
+ for turn in chat_history:
193
+ speaker, message = turn
194
+ full_prompt += f"{speaker}: {message}\n"
195
+
196
+ full_prompt += "Assistant:"
197
+
198
+ # Tokenize and generate response
199
+ inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
200
+ outputs = model.generate(
201
+ **inputs,
202
+ max_new_tokens=200,
203
+ do_sample=True,
204
+ temperature=0.7,
205
+ top_p=0.9,
206
+ pad_token_id=tokenizer.eos_token_id
207
+ )
208
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
209
+
210
+ # Extract just the assistant's latest reply
211
+ assistant_reply = response.split("Assistant:")[-1].strip()
212
+
213
+ # Append assistant reply to chat history
214
+ chat_history.append(("Assistant", assistant_reply))
215
+
216
+ # Return updated chat history
217
+ return "", chat_history
218
+
219
+ # Gradio UI
220
+ with gr.Blocks() as demo:
221
+ gr.Markdown("# 🗨️ DeepSeek / Qwen LoRA Chatbot")
222
+
223
+ chatbot = gr.Chatbot(label="Multilingual LLM chatbot")
224
+ user_input = gr.Textbox(label="Your message", lines=2, placeholder="Type a message and press Enter...")
225
+ send_btn = gr.Button("Send")
226
+
227
+ state = gr.State([]) # Store chat history
228
+
229
+ send_btn.click(fn=chat, inputs=[user_input, state], outputs=[user_input, state, chatbot])
230
+ user_input.submit(fn=chat, inputs=[user_input, state], outputs=[user_input, state, chatbot])
231
+
232
+ # Launch app
233
+ demo.launch(share=True)