diff --git a/README.md b/README.md
index 544e181..3281536 100644
--- a/README.md
+++ b/README.md
@@ -3,6 +3,10 @@
## 🛠 Getting Started
+### Running Data Ingestion:
+
+#### Using Python environment
+
1. install neo4j
2. install python
3. make and configure `.env` in the root directory from `.env.example`
@@ -10,7 +14,7 @@
5. `pip install -r requirements.txt`
6. `python main.py`
-### Alternative: Using docker-compose
+#### Alternative: Using docker-compose
1. install docker and docker-compose
2. make and configure `.env` in the root directory from `.env.example`
@@ -22,6 +26,12 @@
> - **Notion-API cache:** Designed for session scope caching, using FS cache with long TTL will prevent fetching updated pages
> - **Processed pages and links cache:** Designed for rapid test and development. Prevents sync or removal of already processed and cached pages and links from the graph
+### Running Q&A app:
+
+1. Prerequisites: running Neo4j instance with processed data
+2. `pip install -r requirements.txt`
+3. `python -m streamlit run app_st.py`
+
## 🌟 Project Overview

@@ -76,9 +86,11 @@ personal knowledge base.
- [Notion API](https://developers.notion.com/reference/get-database) integration with configurable request caching: Successfully ingesting documents from Notion Knowledge Base (all pages or from specified root page). Repeated ingestion will process only updated pages.
- Basic Graph Construction: Creating graph connections based on knowledge base organizational structure and explicit page mentions.
- Semantic Search: Implemented content embeddings for advanced search capabilities.
+- Basic Streamlit app for querying the graph and visualizing connections.
+
Click to see supported Notion Links 🔗
-
+
| Type | Parse Markdown Text | Parse References | Recursive Parsing |
|------------------------------------------|:-------------------:|:----------------:|:-----------------:|
@@ -162,7 +174,6 @@ personal knowledge base.
-
### 🛠️ In Development
- Multi-Source Data Integration: Expanding beyond Notion to include Pocket, web pages, and more.
Make these integrations easy to plug in.
@@ -174,10 +185,9 @@ Make these integrations easy to plug in.
2. Retrieve semantically similar pages
3. Fetch close neighbors of these pages based on semantic proximity
4. Provide LLM with context from the closest pages (semantically)
- 5. Visualize the graph showing found pages, their semantic scores, neighbors, connections, and topic clusters
-- Achieve 90%+ test coverage
+ 5. Visualize the graph showing found pages, their semantic scores, neighbors, connections, and topic clusters
-
+- Achieve 90%+ test coverage
### 🔮 Future Plans
- Streamlit chat interface with dashboard for visualizing insights and connections (InfraNodus-like).
diff --git a/app_st.py b/app_st.py
new file mode 100644
index 0000000..d103716
--- /dev/null
+++ b/app_st.py
@@ -0,0 +1,155 @@
+import os
+
+import streamlit as st
+import streamlit.components.v1 as components
+
+from graph_rag.config.config_manager import default_config
+from graph_rag.controller import query_controller
+from graph_rag.storage import Neo4jManager
+
+neo4j_manager = Neo4jManager()
+
+# Initialize session state
+if 'config' not in st.session_state:
+ st.session_state.config = default_config
+
+st.set_page_config(page_title="Knowledge Nexus - Graph-based Q&A", layout="wide")
+
+# Custom CSS for styling
+st.markdown("""
+
+""", unsafe_allow_html=True)
+
+# Sidebar for settings
+st.sidebar.title("Settings")
+
+# OpenAI API Key input
+def update_openai_api_key(key: str):
+ print(f"Updating OpenAI API Key to '{key[:4]}{'*' * (len(key) - 8)}{key[-4:]}'")
+ st.session_state.config.OPENAI_API_KEY = key
+ os.environ.update({"OPENAI_API_KEY": key})
+api_key = st.sidebar.text_input("Enter OpenAI API Key",
+ value=st.session_state.config.OPENAI_API_KEY,
+ type="password")
+if api_key != st.session_state.config.OPENAI_API_KEY:
+ update_openai_api_key(api_key)
+
+# LLM Model selection
+def update_llm_model(model: str):
+ print(f"Updating LLM Model to '{model}'")
+ st.session_state.config.LLM_MODEL = model
+models = ["fake_gpt", "gpt-4o-mini", "gpt-4o-2024-08-06"] # Add or modify models as needed
+selected_model = st.sidebar.selectbox("Choose LLM Model",
+ models,
+ index=models.index(st.session_state.config.LLM_MODEL))
+if selected_model != st.session_state.config.LLM_MODEL:
+ update_llm_model(selected_model)
+
+# LLM Temperature
+def update_llm_temperature(temperature: float):
+ print(f"Updating LLM Temperature to {temperature}")
+ st.session_state.config.LLM_TEMPERATURE = temperature
+temperature = st.sidebar.slider("Choose LLM Temperature",
+ min_value=0.0,
+ max_value=1.0,
+ value=st.session_state.config.LLM_TEMPERATURE,
+ step=0.01)
+if temperature != st.session_state.config.LLM_TEMPERATURE:
+ update_llm_temperature(temperature)
+
+# Retrieval method selection
+retrieval_method = st.sidebar.radio(
+ "Choose Retrieval Method",
+ ("Deep Answer", "Simple Answer")
+)
+
+# top_k selection
+top_k = st.sidebar.number_input("Top K Nodes", value=5, min_value=1, step=1)
+
+# Langsmith Tracing
+def update_langsmith_tracing(value: bool):
+ print(f"Updating Langsmith Tracing to: {value}")
+ os.environ.update({"LANGCHAIN_TRACING_V2": str(value)})
+langsmith_tracing_env = bool(os.environ.get("LANGCHAIN_TRACING_V2", False))
+langsmith_tracing = st.sidebar.checkbox("Enable Langsmith Tracing",
+ value=langsmith_tracing_env)
+if langsmith_tracing_env != langsmith_tracing:
+ update_langsmith_tracing(langsmith_tracing)
+
+# Main content
+st.title("Knowledge Nexus")
+
+print("Streamlit app is running")
+
+query = st.text_input("Enter your question:")
+
+if query:
+ print(f"Query received: {query}\nRetrieving using chosen mode: {retrieval_method}")
+ # Use the selected retrieval method
+ if retrieval_method == "Deep Answer":
+ result = query_controller.deep_answer_on_graph(query, top_k)
+ else:
+ result = query_controller.answer_on_graph(query)
+
+ # Create two columns
+ col1, col2 = st.columns([3, 2])
+
+ with col1:
+ # Thinking section with toggle
+ with st.expander("Thinking", expanded=False):
+ st.markdown('
', unsafe_allow_html=True)
+
+ # LLM context
+ st.subheader("LLM answer metadata:")
+ st.json(result['llm_answer'].response_metadata)
+
+ # Intermediate steps
+ st.subheader("Intermediate Steps:")
+ st.markdown('
', unsafe_allow_html=True)
+ for step in result.get('intermediate_steps', []):
+ st.text(step)
+ st.markdown('
', unsafe_allow_html=True)
+
+ # Graph data
+ st.subheader("Graph Data:")
+ st.json(result["graph_data"])
+
+ st.markdown('
', unsafe_allow_html=True)
+
+ st.subheader("Answer:")
+ st.write(result["llm_answer"].content)
+
+ with col2:
+ st.subheader("Graph Visualization:")
+ with st.container():
+ st.markdown('', unsafe_allow_html=True)
+ graph_html = query_controller.render_graph(result["graph_data"])
+ components.html(open(graph_html, 'r').read(), height=600)
+ st.markdown('
', unsafe_allow_html=True)
+
+else:
+ st.write("Please enter a question to get started.")
+
+print("Streamlit app script completed")
diff --git a/requirements.txt b/requirements.txt
index 855c079..ed1bf1e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,6 +11,7 @@ pyaml-env~=1.2.1
python-dotenv~=1.0.1
PyYAML~=6.0.2
spacy~=3.7.5
+streamlit~=1.38.0
pyvis~=0.3.2
python-dateutil~=2.9.0.post0
tiktoken~=0.7.0