|
import random |
|
import pickle |
|
import os |
|
import numpy as np |
|
from collections import defaultdict |
|
from sklearn.feature_extraction.text import CountVectorizer |
|
|
|
|
|
|
|
|
|
class RLChatbot: |
|
def __init__(self, actions, alpha=0.1, gamma=0.9, epsilon=0.2): |
|
self.actions = actions |
|
self.alpha = alpha |
|
self.gamma = gamma |
|
self.epsilon = epsilon |
|
self.q_table = defaultdict(lambda: np.zeros(len(actions))) |
|
self.vectorizer = CountVectorizer() |
|
|
|
def featurize(self, text): |
|
"""Convert input text to a hashed state ID (string key).""" |
|
return str(hash(text.lower()) % (10**8)) |
|
|
|
def choose_action(self, state): |
|
"""Epsilon-greedy action selection.""" |
|
if random.random() < self.epsilon: |
|
return random.randint(0, len(self.actions) - 1) |
|
return int(np.argmax(self.q_table[state])) |
|
|
|
def update(self, state, action, reward, next_state): |
|
"""Q-learning update.""" |
|
old_q = self.q_table[state][action] |
|
next_max = np.max(self.q_table[next_state]) |
|
self.q_table[state][action] += self.alpha * (reward + self.gamma * next_max - old_q) |
|
|
|
def save(self, path="rl_chatbot.pkl"): |
|
with open(path, "wb") as f: |
|
pickle.dump((dict(self.q_table), self.actions), f) |
|
|
|
def load(self, path="rl_chatbot.pkl"): |
|
if os.path.exists(path): |
|
with open(path, "rb") as f: |
|
data = pickle.load(f) |
|
self.q_table = defaultdict(lambda: np.zeros(len(self.actions)), data[0]) |
|
self.actions = data[1] |
|
|
|
|
|
|
|
|
|
def simulated_reward(user_input, bot_response): |
|
"""Fake reward function for simulation: |
|
Higher reward if bot_response 'matches' intent.""" |
|
if "hello" in user_input.lower() and "hello" in bot_response.lower(): |
|
return 5 |
|
if "bye" in user_input.lower() and "bye" in bot_response.lower(): |
|
return 5 |
|
if "help" in user_input.lower() and "help" in bot_response.lower(): |
|
return 5 |
|
return -1 |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
actions = [ |
|
|
|
"Hello! How can I help you today?", |
|
"Hi there! What’s on your mind?", |
|
"Goodbye! Have a great day.", |
|
"See you later! Keep coding.", |
|
"I’m here to help with your questions.", |
|
|
|
|
|
"Are you working on machine learning today?", |
|
"Which model architecture are you using?", |
|
"Do you want to discuss prompt engineering or fine-tuning?", |
|
"I can explain how transformers work in detail.", |
|
"Would you like me to write example PyTorch code for you?", |
|
"I can help debug your reinforcement learning agent.", |
|
"What dataset are you using for your project?", |
|
"Let’s talk about optimizing training performance.", |
|
"Are you running your model on CPU or GPU?", |
|
"I can guide you on hyperparameter tuning.", |
|
|
|
|
|
"Would you like me to generate example code?", |
|
"I can help write a FastAPI endpoint for your AI model.", |
|
"Do you need help with Hugging Face Transformers?", |
|
"We can integrate this with a Flask web app.", |
|
"Do you want me to explain FAISS indexing?", |
|
"I can walk you through a RAG (Retrieval-Augmented Generation) pipeline.", |
|
"Let’s debug your Python code step-by-step.", |
|
"Would you like me to explain gradient descent?", |
|
|
|
|
|
"I’m not sure I understand, could you rephrase?", |
|
"Can you provide more details?", |
|
"Let’s break down the problem together.", |
|
"Interesting question! Let’s explore it.", |
|
"I can provide documentation links if you need.", |
|
"That’s a complex topic, but I can simplify it for you." |
|
] |
|
|
|
|
|
|
|
agent = RLChatbot(actions) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_data = [ |
|
|
|
"hello", "hi there", "hey", "good morning", "good evening", |
|
"what's up", "how are you", "yo", "long time no see", "how's it going", |
|
|
|
|
|
"i need help", "can you help me", "please help", "i have a question", |
|
"i'm stuck", "can you guide me", "how do i fix this", "explain this to me", |
|
"can you give me an example", "show me sample code", |
|
|
|
|
|
"how to train a model", "what is reinforcement learning", |
|
"how does fine tuning work", "what is transfer learning", |
|
"explain gradient descent", "how to improve accuracy", |
|
"what is overfitting", "what is prompt engineering", |
|
"how to load a huggingface model", "how to use pytorch", |
|
"how to deploy a model", "difference between supervised and unsupervised learning", |
|
|
|
|
|
"why is my code not working", "how to debug python code", |
|
"what does this error mean", "how to fix module not found error", |
|
"how to install requirements", "what is virtual environment", |
|
"how to use git", "how to clone a repository", |
|
"what is docker", "how to run flask app", |
|
|
|
|
|
"bye", "goodbye", "see you", "bye bye", "take care", "catch you later" |
|
] |
|
|
|
|
|
for episode in range(200): |
|
user_msg = random.choice(training_data) |
|
state = agent.featurize(user_msg) |
|
action = agent.choose_action(state) |
|
bot_reply = actions[action] |
|
reward = simulated_reward(user_msg, bot_reply) |
|
next_state = agent.featurize("end") |
|
agent.update(state, action, reward, next_state) |
|
|
|
print("✅ Training completed (simulated)") |
|
|
|
|
|
agent.save() |
|
|
|
|
|
|
|
|
|
print("\n🤖 RL Chatbot is ready! Type 'quit' to exit.") |
|
agent.load() |
|
|
|
while True: |
|
user_input = input("You: ") |
|
if user_input.lower() in ["quit", "exit"]: |
|
break |
|
|
|
state = agent.featurize(user_input) |
|
action = agent.choose_action(state) |
|
bot_reply = actions[action] |
|
print(f"Bot: {bot_reply}") |
|
|
|
|
|
try: |
|
reward = int(input("Rate this reply (-5 to 5): ")) |
|
except ValueError: |
|
reward = 0 |
|
next_state = agent.featurize("end") |
|
agent.update(state, action, reward, next_state) |
|
agent.save() |
|
|
|
print("💾 Chatbot model updated and saved.") |
|
|