Question Answering
English
adityashisharma commited on
Commit
23c12e5
verified
1 Parent(s): 550ce91

Upload chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +158 -0
chatbot.py ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ from fastapi import FastAPI, HTTPException, Depends
4
+ from fastapi.security import OAuth2PasswordBearer
5
+ from pydantic import BaseModel
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.prompts import PromptTemplate
8
+ from langchain.vectorstores import FAISS
9
+ from langchain.embeddings import HuggingFaceEmbeddings
10
+ from langchain.memory import ConversationBufferMemory
11
+ from langchain.llms import HuggingFacePipeline
12
+ from langchain.document_loaders import TextLoader, DataFrameLoader
13
+ from langchain_community.vectorstores import FAISS
14
+ from langchain_community.embeddings import HuggingFaceEmbeddings
15
+ from langchain_community.document_loaders import TextLoader, DataFrameLoader
16
+ from langchain_community.llms import HuggingFacePipeline
17
+ from huggingface_hub import login
18
+
19
+ # Log in using the token
20
+ login(token=os.getenv("HUGGINGFACE_HUB_TOKEN"))
21
+
22
+ import pandas as pd
23
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
24
+
25
+ # Step 1: Configure LangSmith Observability
26
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
27
+ os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
28
+ os.environ["LANGCHAIN_API_KEY"] = "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"
29
+ os.environ["LANGCHAIN_PROJECT"] = "yotta-vm-chatbot"
30
+
31
+ # Step 2: Llama 2 Chat Model Setup
32
+ def load_llama2_chat_model():
33
+ model_name = "meta-llama/Llama-2-7b-chat-hf"
34
+ print("Loading Llama 2 Chat model...")
35
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ model_name,
38
+ device_map="auto",
39
+ torch_dtype="float16"
40
+ )
41
+ pipeline_model = pipeline("text-generation", model=model, tokenizer=tokenizer)
42
+ return HuggingFacePipeline(pipeline=pipeline_model)
43
+
44
+ llama_model = load_llama2_chat_model()
45
+
46
+ # Step 3: Load Excel Data
47
+ def fetch_excel_data(file_path):
48
+ """
49
+ Fetch data from a local Excel file and prepare documents for vector store.
50
+ """
51
+ print("Loading data from Excel file...")
52
+ df = pd.read_excel(file_path)
53
+ loader = DataFrameLoader(df, page_content_column="Description", metadata_columns=["Title"])
54
+ documents = loader.load()
55
+ return documents
56
+
57
+ # Step 4: Vector Store with Excel Data
58
+ def update_vector_store(file_path):
59
+ """
60
+ Load data from Excel and update the vector store.
61
+ """
62
+ documents = fetch_excel_data(file_path)
63
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
64
+ vector_store = FAISS.from_documents(documents, embeddings)
65
+ return vector_store
66
+
67
+ # Path to the Excel file
68
+ excel_file_path = "certificate_details_chatbot_2.xlsx"
69
+ vector_store = update_vector_store(excel_file_path)
70
+
71
+ # Step 5: RAG Workflow with Memory
72
+ retriever = vector_store.as_retriever()
73
+ memory = ConversationBufferMemory()
74
+
75
+ qa_chain = RetrievalQA.from_chain_type(
76
+ llm=llama_model,
77
+ retriever=retriever,
78
+ memory=memory,
79
+ return_source_documents=True,
80
+ chain_type_kwargs={
81
+ "prompt": PromptTemplate(
82
+ input_variables=["context", "question"],
83
+ template="Use the following context to answer the question:\n{context}\n\nQuestion: {question}\nAnswer:"
84
+ )
85
+ }
86
+ )
87
+
88
+
89
+ # Step 6: Periodic Sync with Excel
90
+ async def periodic_sync(interval: int = 3600):
91
+ """
92
+ Periodically fetch new data from the Excel file and update the vector store.
93
+ """
94
+ while True:
95
+ try:
96
+ update_vector_store(excel_file_path)
97
+ print("Vector store updated with the latest Excel data.")
98
+ except Exception as e:
99
+ print(f"Error updating vector store: {str(e)}")
100
+ await asyncio.sleep(interval)
101
+
102
+
103
+ # Step 8: FastAPI Deployment
104
+ app = FastAPI()
105
+
106
+ class QueryRequest(BaseModel):
107
+ query: str
108
+
109
+ @app.get("/")
110
+ def root():
111
+ return {"message": "Welcome to the Excel-based Chatbot with RAG and Llama Integration!"}
112
+
113
+ @app.post("/query")
114
+ async def query(request: QueryRequest):
115
+ try:
116
+ response = qa_chain({"query": request.query})
117
+ return {
118
+ "answer": response['result'],
119
+ "source_documents": [
120
+ {"page_content": doc.page_content, "metadata": doc.metadata}
121
+ for doc in response["source_documents"]
122
+ ]
123
+ }
124
+ except Exception as e:
125
+ raise HTTPException(status_code=500, detail=f"Error processing the query: {str(e)}")
126
+
127
+ @app.post("/token")
128
+ async def token():
129
+ return {"access_token": "secure_token_123", "token_type": "bearer"}
130
+
131
+ @app.on_event("startup")
132
+ async def start_background_tasks():
133
+ asyncio.create_task(periodic_sync())
134
+
135
+ # Step 9: Interface and Deployment
136
+ @app.get("/interface")
137
+ def interface():
138
+ """Return a simple HTML interface for interacting with the chatbot."""
139
+ return {
140
+ "html": """
141
+ <html>
142
+ <head><title>Chatbot Interface</title></head>
143
+ <body>
144
+ <h1>Chat with the Bot</h1>
145
+ <form method="post" action="/query">
146
+ <label for="query">Enter your query:</label><br>
147
+ <input type="text" id="query" name="query"/><br><br>
148
+ <button type="submit">Submit</button>
149
+ </form>
150
+ </body>
151
+ </html>
152
+ """
153
+ }
154
+
155
+ if __name__ == "__main__":
156
+ import uvicorn
157
+ print("Starting the chatbot server...")
158
+ uvicorn.run(app, host="0.0.0.0", port=8000)