ankitkushwaha90
/

Reinforcement_Learning

+import random
+import pickle
+import os
+import numpy as np
+from collections import defaultdict
+from sklearn.feature_extraction.text import CountVectorizer
+# ------------------------------
+# RL Chatbot Agent (Q-Learning)
+# ------------------------------
+class RLChatbot:
+    def __init__(self, actions, alpha=0.1, gamma=0.9, epsilon=0.2):
+        self.actions = actions  # possible responses
+        self.alpha = alpha      # learning rate
+        self.gamma = gamma      # discount factor
+        self.epsilon = epsilon  # exploration rate
+        self.q_table = defaultdict(lambda: np.zeros(len(actions)))
+        self.vectorizer = CountVectorizer()
+    def featurize(self, text):
+        """Convert input text to a hashed state ID (string key)."""
+        return str(hash(text.lower()) % (10**8))
+    def choose_action(self, state):
+        """Epsilon-greedy action selection."""
+        if random.random() < self.epsilon:
+            return random.randint(0, len(self.actions) - 1)
+        return int(np.argmax(self.q_table[state]))
+    def update(self, state, action, reward, next_state):
+        """Q-learning update."""
+        old_q = self.q_table[state][action]
+        next_max = np.max(self.q_table[next_state])
+        self.q_table[state][action] += self.alpha * (reward + self.gamma * next_max - old_q)
+    def save(self, path="rl_chatbot.pkl"):
+        with open(path, "wb") as f:
+            pickle.dump((dict(self.q_table), self.actions), f)
+    def load(self, path="rl_chatbot.pkl"):
+        if os.path.exists(path):
+            with open(path, "rb") as f:
+                data = pickle.load(f)
+                self.q_table = defaultdict(lambda: np.zeros(len(self.actions)), data[0])
+                self.actions = data[1]
+# ------------------------------
+# Simulated training environment
+# ------------------------------
+def simulated_reward(user_input, bot_response):
+    """Fake reward function for simulation:
+       Higher reward if bot_response 'matches' intent."""
+    if "hello" in user_input.lower() and "hello" in bot_response.lower():
+        return 5
+    if "bye" in user_input.lower() and "bye" in bot_response.lower():
+        return 5
+    if "help" in user_input.lower() and "help" in bot_response.lower():
+        return 5
+    return -1  # default negative reward
+# ------------------------------
+# Main program
+# ------------------------------
+if __name__ == "__main__":
+    # actions = [
+    #     "Hello! How can I help you?",
+    #     "Goodbye! Have a nice day.",
+    #     "I can help with your problems. What do you need?",
+    #     "I'm not sure I understand.",
+    #     "Please tell me more."
+    # ]
+    actions = [
+    # General greetings & casual
+    "Hello! How can I help you today?",
+    "Hi there! What’s on your mind?",
+    "Goodbye! Have a great day.",
+    "See you later! Keep coding.",
+    "I’m here to help with your questions.",
+    # AI/ML related
+    "Are you working on machine learning today?",
+    "Which model architecture are you using?",
+    "Do you want to discuss prompt engineering or fine-tuning?",
+    "I can explain how transformers work in detail.",
+    "Would you like me to write example PyTorch code for you?",
+    "I can help debug your reinforcement learning agent.",
+    "What dataset are you using for your project?",
+    "Let’s talk about optimizing training performance.",
+    "Are you running your model on CPU or GPU?",
+    "I can guide you on hyperparameter tuning.",
+    # Developer workflow
+    "Would you like me to generate example code?",
+    "I can help write a FastAPI endpoint for your AI model.",
+    "Do you need help with Hugging Face Transformers?",
+    "We can integrate this with a Flask web app.",
+    "Do you want me to explain FAISS indexing?",
+    "I can walk you through a RAG (Retrieval-Augmented Generation) pipeline.",
+    "Let’s debug your Python code step-by-step.",
+    "Would you like me to explain gradient descent?",
+    # More conversational fallback
+    "I’m not sure I understand, could you rephrase?",
+    "Can you provide more details?",
+    "Let’s break down the problem together.",
+    "Interesting question! Let’s explore it.",
+    "I can provide documentation links if you need.",
+    "That’s a complex topic, but I can simplify it for you."
+]
+    agent = RLChatbot(actions)
+    # ------------------------------
+    # Simulated training phase
+    # ------------------------------
+    # training_data = [
+    #     "hello", "hi there", "bye", "goodbye", "i need help", "can you help me",
+    #     "what's up", "please help", "bye bye", "see you"
+    # ]
+    training_data = [
+    # Greetings / casual
+    "hello", "hi there", "hey", "good morning", "good evening",
+    "what's up", "how are you", "yo", "long time no see", "how's it going",
+    # General help
+    "i need help", "can you help me", "please help", "i have a question",
+    "i'm stuck", "can you guide me", "how do i fix this", "explain this to me",
+    "can you give me an example", "show me sample code",
+    # AI / ML specific
+    "how to train a model", "what is reinforcement learning",
+    "how does fine tuning work", "what is transfer learning",
+    "explain gradient descent", "how to improve accuracy",
+    "what is overfitting", "what is prompt engineering",
+    "how to load a huggingface model", "how to use pytorch",
+    "how to deploy a model", "difference between supervised and unsupervised learning",
+    # Coding / debugging
+    "why is my code not working", "how to debug python code",
+    "what does this error mean", "how to fix module not found error",
+    "how to install requirements", "what is virtual environment",
+    "how to use git", "how to clone a repository",
+    "what is docker", "how to run flask app",
+    # Farewells
+    "bye", "goodbye", "see you", "bye bye", "take care", "catch you later"
+]
+    for episode in range(200):
+        user_msg = random.choice(training_data)
+        state = agent.featurize(user_msg)
+        action = agent.choose_action(state)
+        bot_reply = actions[action]
+        reward = simulated_reward(user_msg, bot_reply)
+        next_state = agent.featurize("end")  # stateless
+        agent.update(state, action, reward, next_state)
+    print("✅ Training completed (simulated)")
+    # Save trained model
+    agent.save()
+    # ------------------------------
+    # Interactive chat
+    # ------------------------------
+    print("\n🤖 RL Chatbot is ready! Type 'quit' to exit.")
+    agent.load()
+    while True:
+        user_input = input("You: ")
+        if user_input.lower() in ["quit", "exit"]:
+            break
+        state = agent.featurize(user_input)
+        action = agent.choose_action(state)
+        bot_reply = actions[action]
+        print(f"Bot: {bot_reply}")
+        # Get human feedback (reward)
+        try:
+            reward = int(input("Rate this reply (-5 to 5): "))
+        except ValueError:
+            reward = 0  # default if invalid
+        next_state = agent.featurize("end")
+        agent.update(state, action, reward, next_state)
+        agent.save()
+    print("💾 Chatbot model updated and saved.")