Azure · rdheekonda · Mar 19, 2024 · Mar 13, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/doc/code/memory.ipynb b/doc/code/memory.ipynb
@@ -8,6 +8,38 @@
     "The `pyrit.memory` module provides functionality to keep track of the conversation history. In a nutshell, this can be used as follows"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "f02713ce",
+   "metadata": {},
+   "source": [
+    "The PyRIT DuckDB database comprises of two primary tables: `ConversationStore` and `EmbeddingStore`.\n",
+    "\n",
+    "### **ConversationStore** Table\n",
+    "The `ConversationStore` table is designed to efficiently store and manage conversational data, with each field tailored to capture specific aspects of the conversation with the LLM model:\n",
+    "\n",
+    "| Field            | Type          | Description                                                                                   |\n",
+    "|------------------|---------------|-----------------------------------------------------------------------------------------------|\n",
+    "| uuid             | UUID          | A unique identifier for each conversation entry, serving as the primary key.                  |\n",
+    "| role             | String        | Indicates the origin of the message within the conversation (e.g., \"user\", \"assistant\", \"system\"). |\n",
+    "| content          | String        | The actual text content of the conversation entry.                                            |\n",
+    "| conversation_id  | String        | Groups related conversation entries. Linked to a specific LLM model, it aggregates all related conversations under a single identifier. In multi-turn interactions involving two models, there will be two distinct conversation_ids. |\n",
+    "| timestamp        | DateTime      | The creation or log timestamp of the conversation entry, defaulting to the current UTC time.  |\n",
+    "| normalizer_id    | String        | Groups messages within a prompt_normalizer, aiding in organizing conversation flows.         |\n",
+    "| sha256           | String        | An optional SHA-256 hash of the content for integrity verification.                           |\n",
+    "| labels           | ARRAY(String) | An array of labels for categorizing or filtering conversation entries.                        |\n",
+    "| idx_conversation_id | Index       | An index on the `conversation_id` column to enhance query performance, particularly for retrieving conversation histories based on conversation_id. |\n",
+    "\n",
+    "### **EmbeddingStore** Table\n",
+    "The EmbeddingStore table focuses on storing embeddings associated with the conversational data. Its structure includes:\n",
+    "\n",
+    "| Field          | Type          | Description                                                                                   |\n",
+    "|----------------|---------------|-----------------------------------------------------------------------------------------------|\n",
+    "| uuid           | UUID          | The primary key, which is a foreign key referencing the UUID in the ConversationStore table. |\n",
+    "| embedding      | ARRAY(String)          | An array of floats representing the embedding vector.       |\n",
+    "| embedding_type | String        | The name or type of the embedding, indicating the model or method used. |\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -37,12 +69,12 @@
    "source": [
     "\n",
     "from uuid import uuid4\n",
-    "from pyrit.memory import FileMemory\n",
+    "from pyrit.memory import DuckDBMemory\n",
     "from pyrit.models import ChatMessage\n",
     "\n",
     "conversation_id = str(uuid4())\n",
     "\n",
-    "memory = FileMemory()\n",
+    "memory = DuckDBMemory()\n",
     "message_list = [\n",
     "    ChatMessage(role=\"user\", content=\"Hi, chat bot! This is my initial prompt.\"),\n",
     "    ChatMessage(role=\"assistant\", content=\"Nice to meet you! This is my response.\"),\n",
@@ -59,10 +91,115 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "f7e3bbd2",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "memory = DuckDBMemory()\n",
+    "message_list = [\n",
+    "    ChatMessage(role=\"user\", content=\"Hi, chat bot! This is my initial prompt.\"),\n",
+    "    ChatMessage(role=\"assistant\", content=\"Nice to meet you! This is my response.\"),\n",
+    "]\n",
+    "next_message = ChatMessage(role=\"user\", content=\"Wonderful! This is my second prompt to the chat bot.\")\n",
+    "message_list.append(next_message)\n",
+    "memory.add_chat_messages_to_memory(conversations=message_list, conversation_id=conversation_id)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "2ae35d19",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[ChatMessage(role='user', content='Hi, chat bot! This is my initial prompt.', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='assistant', content='Nice to meet you! This is my response.', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='user', content='Wonderful! This is my second prompt to the chat bot.', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='user', content='Hi, chat bot! This is my initial prompt.', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='assistant', content='Nice to meet you! This is my response.', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='user', content='Wonderful! This is my second prompt to the chat bot.', name=None, tool_calls=None, tool_call_id=None)]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# To retrieve the items from memory\n",
+    "memory.get_chat_messages_with_conversation_id(conversation_id=conversation_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4b3f1ad8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# update based on conversation_id\n",
+    "update_fileds = {\"content\": \"this is updated field\"}\n",
+    "memory.update_entries_by_conversation_id(conversation_id=conversation_id, update_fields=update_fileds)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "91667bf8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[ChatMessage(role='user', content='this is updated field', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='assistant', content='this is updated field', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='user', content='this is updated field', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='user', content='this is updated field', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='assistant', content='this is updated field', name=None, tool_calls=None, tool_call_id=None),\n",
+       " ChatMessage(role='user', content='this is updated field', name=None, tool_calls=None, tool_call_id=None)]"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# To retrieve the items from memory\n",
+    "memory.get_chat_messages_with_conversation_id(conversation_id=conversation_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "c1437cb7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cleanup memory resources\n",
+    "memory.dispose_engine()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ab559581",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
@@ -71,9 +208,9 @@
    "cell_metadata_filter": "-all"
   },
   "kernelspec": {
-   "display_name": "pyrit_kernel",
+   "display_name": "pyrit-dev",
    "language": "python",
-   "name": "pyrit_kernel"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {

diff --git a/doc/code/memory.py b/doc/code/memory.py
@@ -1,15 +1,43 @@
 # %% [markdown]
 # The `pyrit.memory` module provides functionality to keep track of the conversation history. In a nutshell, this can be used as follows
 
+# %% [markdown]
+# The PyRIT DuckDB database comprises of two primary tables: `ConversationStore` and `EmbeddingStore`.
+#
+# ### **ConversationStore** Table
+# The `ConversationStore` table is designed to efficiently store and manage conversational data, with each field tailored to capture specific aspects of the conversation with the LLM model:
+#
+# | Field            | Type          | Description                                                                                   |
+# |------------------|---------------|-----------------------------------------------------------------------------------------------|
+# | uuid             | UUID          | A unique identifier for each conversation entry, serving as the primary key.                  |
+# | role             | String        | Indicates the origin of the message within the conversation (e.g., "user", "assistant", "system"). |
+# | content          | String        | The actual text content of the conversation entry.                                            |
+# | conversation_id  | String        | Groups related conversation entries. Linked to a specific LLM model, it aggregates all related conversations under a single identifier. In multi-turn interactions involving two models, there will be two distinct conversation_ids. |
+# | timestamp        | DateTime      | The creation or log timestamp of the conversation entry, defaulting to the current UTC time.  |
+# | normalizer_id    | String        | Groups messages within a prompt_normalizer, aiding in organizing conversation flows.         |
+# | sha256           | String        | An optional SHA-256 hash of the content for integrity verification.                           |
+# | labels           | ARRAY(String) | An array of labels for categorizing or filtering conversation entries.                        |
+# | idx_conversation_id | Index       | An index on the `conversation_id` column to enhance query performance, particularly for retrieving conversation histories based on conversation_id. |
+#
+# ### **EmbeddingStore** Table
+# The EmbeddingStore table focuses on storing embeddings associated with the conversational data. Its structure includes:
+#
+# | Field          | Type          | Description                                                                                   |
+# |----------------|---------------|-----------------------------------------------------------------------------------------------|
+# | uuid           | UUID          | The primary key, which is a foreign key referencing the UUID in the ConversationStore table. |
+# | embedding      | ARRAY(String)          | An array of floats representing the embedding vector.       |
+# | embedding_type | String        | The name or type of the embedding, indicating the model or method used. |
+#
+
 # %%
 
 from uuid import uuid4
-from pyrit.memory import FileMemory
+from pyrit.memory import DuckDBMemory
 from pyrit.models import ChatMessage
 
 conversation_id = str(uuid4())
 
-memory = FileMemory()
+memory = DuckDBMemory()
 message_list = [
     ChatMessage(role="user", content="Hi, chat bot! This is my initial prompt."),
     ChatMessage(role="assistant", content="Nice to meet you! This is my response."),
@@ -24,3 +52,32 @@
 memory.get_chat_messages_with_conversation_id(conversation_id=conversation_id)
 
 # %%
+memory = DuckDBMemory()
+message_list = [
+    ChatMessage(role="user", content="Hi, chat bot! This is my initial prompt."),
+    ChatMessage(role="assistant", content="Nice to meet you! This is my response."),
+]
+next_message = ChatMessage(role="user", content="Wonderful! This is my second prompt to the chat bot.")
+message_list.append(next_message)
+memory.add_chat_messages_to_memory(conversations=message_list, conversation_id=conversation_id)
+
+
+# %%
+# To retrieve the items from memory
+memory.get_chat_messages_with_conversation_id(conversation_id=conversation_id)
+
+# %%
+# update based on conversation_id
+update_fileds = {"content": "this is updated field"}
+memory.update_entries_by_conversation_id(conversation_id=conversation_id, update_fields=update_fileds)
+
+
+# %%
+# To retrieve the items from memory
+memory.get_chat_messages_with_conversation_id(conversation_id=conversation_id)
+
+# %%
+# Cleanup memory resources
+memory.dispose_engine()
+
+# %%
diff --git a/doc/code/memory_export_to_json.ipynb b/doc/code/memory_export_to_json.ipynb
@@ -0,0 +1,106 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `pyrit.memory` module provides functionality to dump the database tables into JSON files, creating one file per table. Also, dump the data for a given conversation id. In a nutshell, this can be used as follows"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Export all DuckDB tables to JSON format files\n",
+    "\n",
+    "This will export all DuckDB tables to JSON files, which will be stored in the `pyrit/results` folder."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyrit.memory import DuckDBMemory\n",
+    "from uuid import uuid4\n",
+    "from pyrit.models import ChatMessage\n",
+    "\n",
+    "duckdb_memory = DuckDBMemory()\n",
+    "duckdb_memory.export_all_tables()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Export Conversation Data to JSON for a Conversation ID\n",
+    "This functionality exports all conversation data associated with a specific conversation ID to a JSON file. The file, named using the format `conversation_id.json`, will be located in the `pyrit/results` folder."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "conversation_id = str(uuid4())\n",
+    "\n",
+    "message_list = [\n",
+    "    ChatMessage(role=\"user\", content=\"Hi, chat bot! This is my initial prompt.\"),\n",
+    "    ChatMessage(role=\"assistant\", content=\"Nice to meet you! This is my response.\"),\n",
+    "]\n",
+    "next_message = ChatMessage(role=\"user\", content=\"Wonderful! This is my second prompt to the chat bot.\")\n",
+    "message_list.append(next_message)\n",
+    "duckdb_memory.add_chat_messages_to_memory(conversations=message_list, conversation_id=conversation_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "duckdb_memory.export_conversation_by_id(conversation_id=conversation_id)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Cleanup DuckDB resources\n",
+    "duckdb_memory.dispose_engine()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pyrit-dev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}