Add streamlit Q&A app with graph visualization

Jallermax · Oct 2, 2024 · 0994b6a · 0994b6a
1 parent 97c061a
commit 0994b6a
Show file tree

Hide file tree

Showing 3 changed files with 172 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -3,14 +3,18 @@
 
 ## 🛠 Getting Started
 
+### Running Data Ingestion:
+
+#### Using Python environment
+
 1. install neo4j
 2. install python
 3. make and configure `.env` in the root directory from `.env.example`
 4. adjust options in `config/config.yaml` if necessary
 5. `pip install -r requirements.txt`
 6. `python main.py`
 
-### Alternative: Using docker-compose 
+#### Alternative: Using docker-compose 
 
 1. install docker and docker-compose
 2. make and configure `.env` in the root directory from `.env.example`
@@ -22,6 +26,12 @@
 > - **Notion-API cache:** Designed for session scope caching, using FS cache with long TTL will prevent fetching updated pages
 > - **Processed pages and links cache:** Designed for rapid test and development. Prevents sync or removal of already processed and cached pages and links from the graph
 
+### Running Q&A app:
+
+1. Prerequisites: running Neo4j instance with processed data
+2. `pip install -r requirements.txt`
+3. `python -m streamlit run app_st.py`
+
 ## 🌟 Project Overview
 
 ![ingestion visualization](/docs/ingestion.png)
@@ -76,9 +86,11 @@ personal knowledge base.
 - [Notion API](https://developers.notion.com/reference/get-database) integration with configurable request caching: Successfully ingesting documents from Notion Knowledge Base (all pages or from specified root page). Repeated ingestion will process only updated pages. 
 - Basic Graph Construction: Creating graph connections based on knowledge base organizational structure and explicit page mentions.
 - Semantic Search: Implemented content embeddings for advanced search capabilities.
+- Basic Streamlit app for querying the graph and visualizing connections.
+
 <details> 
    <summary>Click to see supported Notion Links 🔗</summary>
-   </br>
+   <br>
 
 | Type                                     | Parse Markdown Text | Parse References | Recursive Parsing |
 |------------------------------------------|:-------------------:|:----------------:|:-----------------:|
@@ -162,7 +174,6 @@ personal knowledge base.
 
 </details>
 
-
 ### 🛠️ In Development
 - Multi-Source Data Integration: Expanding beyond Notion to include Pocket, web pages, and more. 
 Make these integrations easy to plug in. 
@@ -174,10 +185,9 @@ Make these integrations easy to plug in.
   2. Retrieve semantically similar pages
   3. Fetch close neighbors of these pages based on semantic proximity
   4. Provide LLM with context from the closest pages (semantically)
-  5. Visualize the graph showing found pages, their semantic scores, neighbors, connections, and topic clusters
-- Achieve 90%+ test coverage
+  5. Visualize the graph showing found pages, their semantic scores, neighbors, connections, and topic clusters</details>
 
-</details>
+- Achieve 90%+ test coverage
 
 ### 🔮 Future Plans
 - Streamlit chat interface with dashboard for visualizing insights and connections (InfraNodus-like).

diff --git a/app_st.py b/app_st.py
@@ -0,0 +1,155 @@
+import os
+
+import streamlit as st
+import streamlit.components.v1 as components
+
+from graph_rag.config.config_manager import default_config
+from graph_rag.controller import query_controller
+from graph_rag.storage import Neo4jManager
+
+neo4j_manager = Neo4jManager()
+
+# Initialize session state
+if 'config' not in st.session_state:
+    st.session_state.config = default_config
+
+st.set_page_config(page_title="Knowledge Nexus - Graph-based Q&A", layout="wide")
+
+# Custom CSS for styling
+st.markdown("""
+<style>
+    .thinking-pane {
+        background-color: #f0f0f0;
+        border-radius: 5px;
+        padding: 10px;
+        margin-top: 20px;
+    }
+    .stJson {
+        max-height: 300px;
+        overflow-y: auto;
+    }
+    .graph-pane {
+        height: calc(100vh - 100px);
+        overflow-y: auto;
+    }
+    .scrollable-text {
+        max-height: 300px;
+        overflow-y: auto;
+        border: 1px solid #e0e0e0;
+        padding: 10px;
+        margin-bottom: 10px;
+    }
+</style>
+""", unsafe_allow_html=True)
+
+# Sidebar for settings
+st.sidebar.title("Settings")
+
+# OpenAI API Key input
+def update_openai_api_key(key: str):
+    print(f"Updating OpenAI API Key to '{key[:4]}{'*' * (len(key) - 8)}{key[-4:]}'")
+    st.session_state.config.OPENAI_API_KEY = key
+    os.environ.update({"OPENAI_API_KEY": key})
+api_key = st.sidebar.text_input("Enter OpenAI API Key",
+                                value=st.session_state.config.OPENAI_API_KEY,
+                                type="password")
+if api_key != st.session_state.config.OPENAI_API_KEY:
+    update_openai_api_key(api_key)
+
+# LLM Model selection
+def update_llm_model(model: str):
+    print(f"Updating LLM Model to '{model}'")
+    st.session_state.config.LLM_MODEL = model
+models = ["fake_gpt", "gpt-4o-mini", "gpt-4o-2024-08-06"]  # Add or modify models as needed
+selected_model = st.sidebar.selectbox("Choose LLM Model",
+                                      models,
+                                      index=models.index(st.session_state.config.LLM_MODEL))
+if selected_model != st.session_state.config.LLM_MODEL:
+    update_llm_model(selected_model)
+
+# LLM Temperature
+def update_llm_temperature(temperature: float):
+    print(f"Updating LLM Temperature to {temperature}")
+    st.session_state.config.LLM_TEMPERATURE = temperature
+temperature = st.sidebar.slider("Choose LLM Temperature",
+                                min_value=0.0,
+                                max_value=1.0,
+                                value=st.session_state.config.LLM_TEMPERATURE,
+                                step=0.01)
+if temperature != st.session_state.config.LLM_TEMPERATURE:
+    update_llm_temperature(temperature)
+
+# Retrieval method selection
+retrieval_method = st.sidebar.radio(
+    "Choose Retrieval Method",
+    ("Deep Answer", "Simple Answer")
+)
+
+# top_k selection
+top_k = st.sidebar.number_input("Top K Nodes", value=5, min_value=1, step=1)
+
+# Langsmith Tracing
+def update_langsmith_tracing(value: bool):
+    print(f"Updating Langsmith Tracing to: {value}")
+    os.environ.update({"LANGCHAIN_TRACING_V2": str(value)})
+langsmith_tracing_env = bool(os.environ.get("LANGCHAIN_TRACING_V2", False))
+langsmith_tracing = st.sidebar.checkbox("Enable Langsmith Tracing",
+                                        value=langsmith_tracing_env)
+if langsmith_tracing_env != langsmith_tracing:
+    update_langsmith_tracing(langsmith_tracing)
+
+# Main content
+st.title("Knowledge Nexus")
+
+print("Streamlit app is running")
+
+query = st.text_input("Enter your question:")
+
+if query:
+    print(f"Query received: {query}\nRetrieving using chosen mode: {retrieval_method}")
+    # Use the selected retrieval method
+    if retrieval_method == "Deep Answer":
+        result = query_controller.deep_answer_on_graph(query, top_k)
+    else:
+        result = query_controller.answer_on_graph(query)
+
+    # Create two columns
+    col1, col2 = st.columns([3, 2])
+
+    with col1:
+        # Thinking section with toggle
+        with st.expander("Thinking", expanded=False):
+            st.markdown('<div class="thinking-pane">', unsafe_allow_html=True)
+
+            # LLM context
+            st.subheader("LLM answer metadata:")
+            st.json(result['llm_answer'].response_metadata)
+
+            # Intermediate steps
+            st.subheader("Intermediate Steps:")
+            st.markdown('<div class="scrollable-text">', unsafe_allow_html=True)
+            for step in result.get('intermediate_steps', []):
+                st.text(step)
+            st.markdown('</div>', unsafe_allow_html=True)
+
+            # Graph data
+            st.subheader("Graph Data:")
+            st.json(result["graph_data"])
+
+            st.markdown('</div>', unsafe_allow_html=True)
+
+        st.subheader("Answer:")
+        st.write(result["llm_answer"].content)
+
+    with col2:
+        st.subheader("Graph Visualization:")
+        with st.container():
+            st.markdown('<div class="graph-pane">', unsafe_allow_html=True)
+            graph_html = query_controller.render_graph(result["graph_data"])
+            components.html(open(graph_html, 'r').read(), height=600)
+            st.markdown('</div>', unsafe_allow_html=True)
+
+else:
+    st.write("Please enter a question to get started.")
+
+print("Streamlit app script completed")
diff --git a/requirements.txt b/requirements.txt
@@ -11,6 +11,7 @@ pyaml-env~=1.2.1
 python-dotenv~=1.0.1
 PyYAML~=6.0.2
 spacy~=3.7.5
+streamlit~=1.38.0
 pyvis~=0.3.2
 python-dateutil~=2.9.0.post0
 tiktoken~=0.7.0