Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
Multilingual Dataset.jsonl +0 -0
Multilingual_best_replies_train.jsonl +3 -0
qwen_1_5b_llm_fine_tuned_chatbot.py +233 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Multilingual_best_replies_train.jsonl filter=lfs diff=lfs merge=lfs -text

Multilingual Dataset.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

Multilingual_best_replies_train.jsonl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f618537b3577f7863450b6f2e9121a2dc783f00a29e47cc7500e4ec7d646101
+size 20877686

qwen_1_5b_llm_fine_tuned_chatbot.py ADDED Viewed

	@@ -0,0 +1,233 @@

+# -*- coding: utf-8 -*-
+"""Qwen 1.5B LLM fine-tuned Chatbot.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1zLLSY8_XQNi5ftb-2otUN-SVr6zaYTih
+"""
+!pip install -U transformers datasets peft accelerate
+!pip install -U bitsandbytes --force-reinstall
+"""Model page: https://huggingface.co/Gensyn/Qwen2.5-1.5B-Instruct
+⚠️ If the generated code snippets do not work, please open an issue on either the [model repo](https://huggingface.co/Gensyn/Qwen2.5-1.5B-Instruct)
+			and/or on [huggingface.js](https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries-snippets.ts) 🙏
+"""
+# Use a pipeline as a high-level helper
+from transformers import pipeline
+pipe = pipeline("text-generation", model="Gensyn/Qwen2.5-1.5B-Instruct")
+messages = [
+    {"role": "user", "content": "Who are you?"},
+]
+pipe(messages)
+# Load model directly
+# Load model directly
+from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
+from datasets import load_dataset
+from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel
+import torch
+from transformers import BitsAndBytesConfig
+import gradio as gr
+model_name = "Gensyn/Qwen2.5-1.5B-Instruct"
+quantization_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_compute_dtype=torch.bfloat16
+)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=quantization_config,
+    device_map="auto"
+)
+model.config.use_cache = False
+model.gradient_checkpointing_enable()
+token = ""
+tokenizer = AutoTokenizer.from_pretrained("Gensyn/Qwen2.5-1.5B-Instruct", token=token)
+model = AutoModelForCausalLM.from_pretrained("Gensyn/Qwen2.5-1.5B-Instruct", token=token)
+# Load tokenizer and quantized model
+model_name = "Gensyn/Qwen2.5-1.5B-Instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    load_in_4bit=True,
+    device_map="auto"
+)
+# Prepare model for LoRA fine-tuning
+model = prepare_model_for_kbit_training(model)
+# LoRA configuration
+lora_config = LoraConfig(
+    r=16,
+    lora_alpha=32,
+    target_modules=["q_proj", "v_proj"],
+    lora_dropout=0.05,
+    bias="none",
+    task_type="CAUSAL_LM"
+)
+# Apply LoRA adapters
+model = get_peft_model(model, lora_config)
+# Load a tiny example dataset
+dataset = load_dataset("timdettmers/openassistant-guanaco", split="train")
+dataset = dataset.select(range(int(len(dataset) * 0.005)))
+# Tokenize data
+def tokenize_function(examples):
+    tokenized_examples = tokenizer(examples["text"], truncation=True, max_length=512, padding="max_length", return_tensors="pt")
+    tokenized_examples["labels"] = tokenized_examples["input_ids"].clone()
+    return tokenized_examples
+tokenized_datasets = dataset.map(tokenize_function, batched=True, remove_columns=["text"])
+# Training arguments
+training_args = TrainingArguments(
+    output_dir="./peft-lora-output",
+    per_device_train_batch_size=4,
+    num_train_epochs=3,
+    learning_rate=2e-4,
+    fp16=True,
+    logging_steps=10,
+    save_steps=50,
+    save_total_limit=1,
+    report_to="none"
+)
+# Trainer setup
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_datasets
+)
+# Start training
+trainer.train()
+# Save LoRA adapter weights
+model.save_pretrained("./peft-lora-output")
+tokenizer.save_pretrained("./peft-lora-output")
+from transformers import pipeline
+# Load base model in 4-bit again
+base_model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    load_in_4bit=True,
+    device_map="auto"
+)
+# Load LoRA adapters into base model
+from peft import PeftModel
+peft_model = PeftModel.from_pretrained(base_model, "./peft-lora-output")
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained("./peft-lora-output")
+# Text generation pipeline using base model + LoRA
+pipe = pipeline("text-generation", model=peft_model, tokenizer=tokenizer)
+# Example prompt
+prompt = "什么是人工智能"
+# Generate output
+output = pipe(prompt, max_length=100, do_sample=True, temperature=0.7)
+# Display generated text
+print("Prompt:\n", prompt)
+print("\nGenerated Text:\n", output[0]['generated_text'])
+!pip install gradio
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from peft import PeftModel
+import torch
+import gradio as gr
+# Model path (base or LoRA adapter checkpoint)
+model_name = "Gensyn/Qwen2.5-1.5B-Instruct"
+# 4-bit quantization config
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
+    bnb_4bit_quant_type="nf4"
+)
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Load model with quantization and device mapping
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    quantization_config=bnb_config,
+    device_map="auto"
+)
+# If using LoRA adapter — optionally merge:
+# model = PeftModel.from_pretrained(model, "path/to/lora-adapter")
+# Function to handle multi-turn chat
+def chat(user_input, chat_history):
+    # Prepare full conversation context
+    chat_history.append(("User", user_input))
+    full_prompt = ""
+    for turn in chat_history:
+        speaker, message = turn
+        full_prompt += f"{speaker}: {message}\n"
+    full_prompt += "Assistant:"
+    # Tokenize and generate response
+    inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
+    outputs = model.generate(
+        **inputs,
+        max_new_tokens=200,
+        do_sample=True,
+        temperature=0.7,
+        top_p=0.9,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract just the assistant's latest reply
+    assistant_reply = response.split("Assistant:")[-1].strip()
+    # Append assistant reply to chat history
+    chat_history.append(("Assistant", assistant_reply))
+    # Return updated chat history
+    return "", chat_history
+# Gradio UI
+with gr.Blocks() as demo:
+    gr.Markdown("# 🗨️ DeepSeek / Qwen LoRA Chatbot")
+    chatbot = gr.Chatbot(label="Multilingual LLM chatbot")
+    user_input = gr.Textbox(label="Your message", lines=2, placeholder="Type a message and press Enter...")
+    send_btn = gr.Button("Send")
+    state = gr.State([])  # Store chat history
+    send_btn.click(fn=chat, inputs=[user_input, state], outputs=[user_input, state, chatbot])
+    user_input.submit(fn=chat, inputs=[user_input, state], outputs=[user_input, state, chatbot])
+# Launch app
+demo.launch(share=True)