diff --git a/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/data_level0.bin b/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/data_level0.bin new file mode 100644 index 0000000..9f307c8 Binary files /dev/null and b/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/data_level0.bin differ diff --git a/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/header.bin b/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/header.bin new file mode 100644 index 0000000..074f5b8 Binary files /dev/null and b/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/header.bin differ diff --git a/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/length.bin b/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/length.bin new file mode 100644 index 0000000..efdb816 Binary files /dev/null and b/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/length.bin differ diff --git a/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/link_lists.bin b/chatbot/LLM_Model/db/ad0a48fd-defc-4ead-a707-908495cd338f/link_lists.bin new file mode 100644 index 0000000..e69de29 diff --git a/chatbot/LLM_Model/db/chroma.sqlite3 b/chatbot/LLM_Model/db/chroma.sqlite3 new file mode 100644 index 0000000..aef1ab4 Binary files /dev/null and b/chatbot/LLM_Model/db/chroma.sqlite3 differ diff --git a/chatbot/LLM_Model/pdfs/machinery_maintenance_report.pdf b/chatbot/LLM_Model/pdfs/machinery_maintenance_report.pdf new file mode 100644 index 0000000..ca2972e Binary files /dev/null and b/chatbot/LLM_Model/pdfs/machinery_maintenance_report.pdf differ diff --git a/chatbot/LLM_Model/test.ipynb b/chatbot/LLM_Model/test.ipynb new file mode 100644 index 0000000..0aff26c --- /dev/null +++ b/chatbot/LLM_Model/test.ipynb @@ -0,0 +1,402 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import PyPDFDirectoryLoader\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain_google_genai import GoogleGenerativeAIEmbeddings\n", + "from langchain_google_genai import ChatGoogleGenerativeAI\n", + "\n", + "from langchain.vectorstores import Chroma" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "metadata": {}, + "outputs": [], + "source": [ + "pdf_loader=PyPDFDirectoryLoader(\"pdfs\")" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "metadata": {}, + "outputs": [], + "source": [ + "data=pdf_loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Machinery Maintenance Report\\nJCB\\nHours of Operation: 50 - Lubricated all grease points, inspected and cleaned air filter, checked battery terminals - All in good condition.\\nHours of Operation: 250 - Changed engine oil and filter, replaced fuel filter, cleaned radiator and cooler - No issues found.\\nHours of Operation: 500 - Changed hydraulic oil filter, inspected hydraulic system - Minor wear on hoses.\\nBulldozer\\nHours of Operation: 100 - Checked and adjusted track tension, lubricated all points - Track tension needed adjustment.\\nHours of Operation: 300 - Replaced engine oil and filter, cleaned air filter - Air filter had significant dust.\\nHours of Operation: 600 - Checked hydraulic system, inspected undercarriage - No significant wear observed.\\nExcavator\\nHours of Operation: 200 - Lubricated all grease points, checked hydraulic oil level - All in good condition.\\nHours of Operation: 500 - Changed engine oil and filter, inspected hydraulic system - Hydraulic oil level low, topped up.\\nHours of Operation: 1000 - Replaced hydraulic oil and filter, inspected tracks - Minor wear on tracks.\\nRoad Roller\\nHours of Operation: 150 - Checked vibratory system, lubricated all points - Vibratory system working well.\\nHours of Operation: 400 - Replaced engine oil and filter, cleaned air filter - Air filter had moderate dust.\\nHours of Operation: 800 - Checked hydraulic system, inspected drum - No significant wear observed.\\nWaste Collection Car\\nHours of Operation: 300 - Checked and adjusted brakes, lubricated all points - Brakes needed minor adjustment.\\nHours of Operation: 700 - Replaced engine oil and filter, inspected hydraulic system - Hydraulic system in good condition.\\nPage 1'" + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[0].page_content.strip()" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "metadata": {}, + "outputs": [], + "source": [ + "text_splitter = RecursiveCharacterTextSplitter(\n", + " chunk_size=400,\n", + " chunk_overlap=100\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "text_chunks=text_splitter.split_documents(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(metadata={'source': 'pdfs/machinery_maintenance_report.pdf', 'page': 0}, page_content='Machinery Maintenance Report\\nJCB\\nHours of Operation: 50 - Lubricated all grease points, inspected and cleaned air filter, checked battery terminals - All in good condition.\\nHours of Operation: 250 - Changed engine oil and filter, replaced fuel filter, cleaned radiator and cooler - No issues found.')" + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text_chunks[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "google_gemini_api=os.getenv(\"GOOGLE_API_KEY\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Creating DB" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "metadata": {}, + "outputs": [], + "source": [ + "persist_directory=\"db\"" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "I0000 00:00:1722068459.907874 679189 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n" + ] + } + ], + "source": [ + "embedding = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "metadata": {}, + "outputs": [], + "source": [ + "vectordb=Chroma.from_documents(\n", + " documents=text_chunks,\n", + " embedding=embedding,\n", + " persist_directory=persist_directory\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "metadata": {}, + "outputs": [], + "source": [ + "vectordb.persist()" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [], + "source": [ + "vectordb=None" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "vectordb=Chroma(persist_directory=persist_directory,embedding_function=embedding)" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vectordb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### getting the DB" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "metadata": {}, + "outputs": [], + "source": [ + "retriver=vectordb.as_retriever()" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "metadata": {}, + "outputs": [], + "source": [ + "docs=retriver.get_relevant_documents(\"Who Is anurag singh and where from he?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Document(metadata={'page': 0, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Machinery Maintenance Report\\nJCB\\nHours of Operation: 50 - Lubricated all grease points, inspected and cleaned air filter, checked battery terminals - All in good condition.\\nHours of Operation: 250 - Changed engine oil and filter, replaced fuel filter, cleaned radiator and cooler - No issues found.'),\n", + " Document(metadata={'page': 0, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Machinery Maintenance Report\\nJCB\\nHours of Operation: 50 - Lubricated all grease points, inspected and cleaned air filter, checked battery terminals - All in good condition.\\nHours of Operation: 250 - Changed engine oil and filter, replaced fuel filter, cleaned radiator and cooler - No issues found.'),\n", + " Document(metadata={'page': 0, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Machinery Maintenance Report\\nJCB\\nHours of Operation: 50 - Lubricated all grease points, inspected and cleaned air filter, checked battery terminals - All in good condition.\\nHours of Operation: 250 - Changed engine oil and filter, replaced fuel filter, cleaned radiator and cooler - No issues found.'),\n", + " Document(metadata={'page': 0, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Hours of Operation: 600 - Checked hydraulic system, inspected undercarriage - No significant wear observed.\\nExcavator\\nHours of Operation: 200 - Lubricated all grease points, checked hydraulic oil level - All in good condition.\\nHours of Operation: 500 - Changed engine oil and filter, inspected hydraulic system - Hydraulic oil level low, topped up.')]" + ] + }, + "execution_count": 119, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "docs" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from dotenv import load_dotenv\n", + "\n", + "load_dotenv()\n", + "google_gemini_api=os.getenv(\"GOOGLE_API_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "I0000 00:00:1722068462.676466 679189 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n", + "I0000 00:00:1722068462.677410 679189 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported\n" + ] + } + ], + "source": [ + "llm_model=ChatGoogleGenerativeAI(model=\"gemini-1.5-pro\",google_api_key=google_gemini_api)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "metadata": {}, + "outputs": [], + "source": [ + "system_prompt = (\n", + " \"You have an expertise on Muncipal corporation and you are well aware about the Muncipal Corporation Indore and You have all the information regarding the Indore\"\n", + " \"You also have some additional data from the dataset of the IMC indore\"\n", + " \"Provide the answer consisely\"\n", + " \"Provide the answer Under 150 words\"\n", + " \"Context: {context}\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "metadata": {}, + "outputs": [], + "source": [ + "from langchain.chains import create_retrieval_chain\n", + "from langchain.chains.combine_documents import create_stuff_documents_chain\n", + "from langchain_core.prompts import ChatPromptTemplate\n", + "prompt = ChatPromptTemplate.from_messages(\n", + " [\n", + " (\"system\", system_prompt),\n", + " (\"human\", \"{input}\"),\n", + " ]\n", + ")\n", + "question_answer_chain = create_stuff_documents_chain(llm_model, prompt)\n", + "chain = create_retrieval_chain(retriver, question_answer_chain)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "RunnableBinding(bound=RunnableAssign(mapper={\n", + " context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])\n", + " | VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=), config={'run_name': 'retrieve_documents'})\n", + "})\n", + "| RunnableAssign(mapper={\n", + " answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={\n", + " context: RunnableLambda(format_docs)\n", + " }), config={'run_name': 'format_inputs'})\n", + " | ChatPromptTemplate(input_variables=['context', 'input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template='You have an expertise on Muncipal corporation and you are well aware about the Muncipal Corporation Indore and You have all the information regarding the IndoreYou also have some additional data from the dataset of the IMC indoreProvide the answer consiselyProvide the answer Under 150 wordsContext: {context}')), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}'))])\n", + " | ChatGoogleGenerativeAI(model='models/gemini-1.5-pro', google_api_key=SecretStr('**********'), client=, async_client=, default_metadata=())\n", + " | StrOutputParser(), config={'run_name': 'stuff_documents_chain'})\n", + " }), config={'run_name': 'retrieval_chain'})" + ] + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "metadata": {}, + "outputs": [], + "source": [ + "query=\"i got a pithole on indore near highway what i can do ?\"" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'input': 'i got a pithole on indore near highway what i can do ?',\n", + " 'context': [Document(metadata={'page': 1, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Machinery Maintenance Report\\nHours of Operation: 1200 - Checked waste compactor, inspected tires - Tires needed rotation.\\nPage 2'),\n", + " Document(metadata={'page': 1, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Machinery Maintenance Report\\nHours of Operation: 1200 - Checked waste compactor, inspected tires - Tires needed rotation.\\nPage 2'),\n", + " Document(metadata={'page': 1, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Machinery Maintenance Report\\nHours of Operation: 1200 - Checked waste compactor, inspected tires - Tires needed rotation.\\nPage 2'),\n", + " Document(metadata={'page': 0, 'source': 'pdfs/machinery_maintenance_report.pdf'}, page_content='Hours of Operation: 1000 - Replaced hydraulic oil and filter, inspected tracks - Minor wear on tracks.\\nRoad Roller\\nHours of Operation: 150 - Checked vibratory system, lubricated all points - Vibratory system working well.\\nHours of Operation: 400 - Replaced engine oil and filter, cleaned air filter - Air filter had moderate dust.')],\n", + " 'answer': 'I understand you\\'re concerned about a pothole in Indore near a highway. Since you\\'re talking about a pothole significant enough to be called a \"pithole,\" it sounds like a safety hazard. \\n\\nHere\\'s what you can do:\\n\\n1. **Report it to Indore Municipal Corporation (IMC):** You can use their website, mobile app (if available), or call their helpline. Be prepared with the location details (landmarks, nearest building, etc.)\\n2. **Social Media:** Share a picture on social media platforms like Twitter, tagging IMC\\'s official handle. This can help spread awareness and prompt faster action. \\n\\nRemember, providing accurate information and clear visuals will help the authorities address the issue quickly. \\n'}" + ] + }, + "execution_count": 126, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chain.invoke({\"input\": query})" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "analyzerenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/chatbot/LLM_Model/test.py b/chatbot/LLM_Model/test.py new file mode 100644 index 0000000..3bca3d1 --- /dev/null +++ b/chatbot/LLM_Model/test.py @@ -0,0 +1,88 @@ +from langchain_community.document_loaders import PyPDFDirectoryLoader +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_google_genai import GoogleGenerativeAIEmbeddings +from langchain_google_genai import ChatGoogleGenerativeAI + +from langchain.vectorstores import Chroma + + +import os +from dotenv import load_dotenv + +load_dotenv() +google_gemini_api=os.getenv("GOOGLE_API_KEY") + + + +pdf_loader=PyPDFDirectoryLoader("pdfs") +data=pdf_loader.load() +text_splitter = RecursiveCharacterTextSplitter( + chunk_size=400, + chunk_overlap=100 +) +text_chunks=text_splitter.split_documents(data) + + +persist_directory="db" +embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001") +vectordb=Chroma.from_documents( + documents=text_chunks, + embedding=embedding, + persist_directory=persist_directory +) +vectordb.persist() +vectordb=None +vectordb=Chroma(persist_directory=persist_directory,embedding_function=embedding) + + +retriver=vectordb.as_retriever() + +llm_model=ChatGoogleGenerativeAI(model="gemini-1.5-pro",google_api_key=google_gemini_api) + +system_prompt = ( + "You have an expertise on Muncipal corporation and you are well aware about the Muncipal Corporation Indore and You have all the information regarding the Indore" + "You also have some additional data from the dataset of the IMC indore" + "Provide the answer consisely" + "Provide the answer Under 150 words" + "Context: {context}" +) + +from langchain.chains import create_retrieval_chain +from langchain.chains.combine_documents import create_stuff_documents_chain +from langchain_core.prompts import ChatPromptTemplate +prompt = ChatPromptTemplate.from_messages( + [ + ("system", system_prompt), + ("human", "{input}"), + ] +) +question_answer_chain = create_stuff_documents_chain(llm_model, prompt) +chain = create_retrieval_chain(retriver, question_answer_chain) + +# Basic memory class +class Memory: + def __init__(self): + self.history = [] + + def add(self, entry): + self.history.append(entry) + if len(self.history) > 10: # Limit the history size + self.history.pop(0) + + def get_context(self): + return " ".join(self.history) + +memory = Memory() + +def get_response(query): + try: + context = memory.get_context() + response = chain.invoke({"input": query, "context": context}) + answer = response.get('answer', '') + memory.add(f"User: {query}\nAI: {answer}") + return answer + except Exception as e: + return "Sorry, I couldn't process your request." + +response=get_response("i found pithole near my home indore") +print(response) \ No newline at end of file