Skip to content

Commit

Permalink
Migrate JSON history cache to database solution
Browse files Browse the repository at this point in the history
This patch introduces a migration from the old JSON format to a database
format. This allow us to switch between sqlite, postgresql and mysql,
giving the user more autonomy, security and control over their database.
  • Loading branch information
r0x0d committed Jan 13, 2025
1 parent a1cab7b commit 493ff52
Show file tree
Hide file tree
Showing 34 changed files with 1,086 additions and 905 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ repos:
- responses
- tomli; python_version<"3.11"
- setuptools
- sqlalchemy

- repo: https://github.com/gitleaks/gitleaks
rev: v8.22.1
Expand Down
78 changes: 70 additions & 8 deletions command_line_assistant/config/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,67 @@

import dataclasses
from pathlib import Path
from typing import Union
from typing import Optional, Union


@dataclasses.dataclass
class DatabaseSchema:
"""This class represents the [history.database] section of our config.toml file.
Attributes:
connection (str): The connection string.
"""

type: str = "sqlite" # 'sqlite', 'mysql', 'postgresql', etc.
host: Optional[str] = None
database: Optional[str] = None
port: Optional[int] = None # Optional for SQLite as it doesn't require host or port
user: Optional[str] = None # Optional for SQLite
password: Optional[str] = None # Optional for SQLite
connection_string: Optional[Union[str, Path]] = (
None # Some databases like SQLite can use a file path
)

def __post_init__(self):
"""Post initialization method to normalize values"""
# If the database type is not a supported one, we can just skip it.
allowed_databases = ("mysql", "sqlite", "postgresql")
if self.type not in allowed_databases:
raise ValueError(
f"The database type must be one of {','.join(allowed_databases)}, not {self.type}"
)

if self.connection_string:
self.connection_string = Path(self.connection_string).expanduser()

# Post-initialization to set default values for specific db types
if self.type == "sqlite" and not self.connection_string:
self.connection_string = f"sqlite://{self.database}"
elif self.type == "mysql" and not self.port:
self.port = 3306 # Default MySQL port

Check warning on line 42 in command_line_assistant/config/schemas.py

View check run for this annotation

Codecov / codecov/patch

command_line_assistant/config/schemas.py#L42

Added line #L42 was not covered by tests
elif self.type == "postgresql" and not self.port:
self.port = 5432 # Default PostgreSQL port

Check warning on line 44 in command_line_assistant/config/schemas.py

View check run for this annotation

Codecov / codecov/patch

command_line_assistant/config/schemas.py#L44

Added line #L44 was not covered by tests

def get_connection_url(self) -> str:
"""
Constructs and returns the connection URL or string for the respective database.
Raises:
ValueError: In case the type is not recognized
Returns:
str: The URL formatted connection
"""
connection_urls = {
"sqlite": f"sqlite:///{self.connection_string}",
"mysql": f"mysql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}",
"postgresql": f"postgresql://{self.user}:{self.password}@{self.host}:{self.port}/{self.database}",
}

if self.type not in connection_urls:
raise ValueError(f"Unsupported database type: {self.type}")

Check warning on line 63 in command_line_assistant/config/schemas.py

View check run for this annotation

Codecov / codecov/patch

command_line_assistant/config/schemas.py#L63

Added line #L63 was not covered by tests

return connection_urls[self.type]


@dataclasses.dataclass
Expand Down Expand Up @@ -83,13 +143,15 @@ class HistorySchema:
"""

enabled: bool = True
file: Union[str, Path] = Path( # type: ignore
"/var/lib/command-line-assistant/history.json"
)
database: DatabaseSchema = dataclasses.field(default_factory=DatabaseSchema)

def __post_init__(self):
"""Post initialization method to normalize values"""
self.file: Path = Path(self.file).expanduser()

# # Database may be present in the config.toml. If it is not, we odn't do
# # anything and go with defaults.
if isinstance(self.database, dict):
self.database = DatabaseSchema(**self.database)

Check warning on line 154 in command_line_assistant/config/schemas.py

View check run for this annotation

Codecov / codecov/patch

command_line_assistant/config/schemas.py#L154

Added line #L154 was not covered by tests


@dataclasses.dataclass
Expand All @@ -108,8 +170,8 @@ class AuthSchema:

def __post_init__(self) -> None:
"""Post initialization method to normalize values"""
self.cert_file = Path(self.cert_file)
self.key_file = Path(self.key_file)
self.cert_file = Path(self.cert_file).expanduser()
self.key_file = Path(self.key_file).expanduser()


@dataclasses.dataclass
Expand All @@ -122,7 +184,7 @@ class BackendSchema:
"""

endpoint: str = "http://0.0.0.0:8080"
auth: Union[dict, AuthSchema] = dataclasses.field(default_factory=AuthSchema)
auth: AuthSchema = dataclasses.field(default_factory=AuthSchema)

def __post_init__(self):
"""Post initialization method to normalize values"""
Expand Down
1 change: 1 addition & 0 deletions command_line_assistant/daemon/database/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Main module to hold anything related to databases."""
170 changes: 170 additions & 0 deletions command_line_assistant/daemon/database/manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""Database module to handle SQLAlchemy connections and interactions."""

import logging
import uuid
from contextlib import contextmanager
from typing import Generator, Optional, TypeVar

from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.pool import StaticPool

from command_line_assistant.config import Config
from command_line_assistant.daemon.database.models.base import BaseModel

logger = logging.getLogger(__name__)


# Type variable for ORM models
T = TypeVar("T")


class DatabaseError(Exception):
"""Base exception for database errors."""


class ConnectionError(DatabaseError):
"""Exception raised when connection fails."""


class QueryError(DatabaseError):
"""Exception raised when query fails."""


class DatabaseManager:
"""Class to handle database operations using SQLAlchemy."""

def __init__(self, config: Config, echo: bool = False) -> None:
"""Initialize database connection.
Args:
database (Path): Path to the SQLite database file
echo (bool): Enable SQL query logging if True
"""
self._config = config
self._engine: Engine = self._create_engine(echo)
self._session_factory = sessionmaker(bind=self._engine)

def _create_engine(self, echo: bool) -> Engine:
"""Create SQLAlchemy engine with proper settings.
Args:
echo (bool): Enable SQL query logging if True
Returns:
Engine: Configured SQLAlchemy engine
Raises:
ConnectionError: When invalid database settings are provided
"""
try:
connection_url = self._config.history.database.get_connection_url()

# SQLite-specific settings
connect_args = {}
if self._config.history.database.type == "sqlite":
connect_args["check_same_thread"] = False
return create_engine(
connection_url,
echo=echo,
poolclass=StaticPool,
connect_args=connect_args,
)

# For other databases, use standard pooling
return create_engine(

Check warning on line 76 in command_line_assistant/daemon/database/manager.py

View check run for this annotation

Codecov / codecov/patch

command_line_assistant/daemon/database/manager.py#L76

Added line #L76 was not covered by tests
connection_url,
echo=echo,
pool_pre_ping=True,
pool_size=5,
max_overflow=10,
)
except Exception as e:
logger.error("Failed to create database engine: %s", e)
raise ConnectionError(f"Could not create database engine: {e}") from e

def connect(self) -> None:
"""Create database tables if they don't exist."""
try:
BaseModel.metadata.create_all(self._engine)
except Exception as e:
logger.error("Failed to create database tables: %s", e)
raise ConnectionError(f"Could not create tables: {e}") from e

@contextmanager
def session(self) -> Generator[Session, None, None]:
"""Create a contextual database session.
Yields:
Session: SQLAlchemy session object
Raises:
QueryError: If session operations fail
"""
session = self._session_factory()
try:
yield session
session.commit()
except Exception as e:
session.rollback()
logger.error("Database session error: %s", e)
raise QueryError(f"Session error: {e}") from e
finally:
session.close()

def add(self, instance: T) -> None:
"""Add an instance to the database.
Args:
instance (T): SQLAlchemy model instance to add
Raises:
QueryError: If adding fails
"""
try:
with self.session() as session:
session.add(instance)
session.flush()
except Exception as e:
logger.error("Failed to add instance: %s", e)
raise QueryError(f"Failed to add instance: {e}") from e

def query(self, model: type[T]) -> list[T]:
"""Query all instances of a model.
Args:
model (type[T]): SQLAlchemy model class to query
Returns:
list[T]: List of model instances
Raises:
QueryError: If query fails
"""
try:
with self.session() as session:
return session.query(model).all()
except Exception as e:
logger.error("Failed to query instances: %s", e)
raise QueryError(f"Failed to query instances: {e}") from e

def get(self, model: type[T], id: uuid.UUID) -> Optional[T]:
"""Get a single instance by ID.
Args:
model (type[T]): SQLAlchemy model class
id (uuid.UUID): Instance ID to get
Returns:
Optional[T]: Model instance if found, None otherwise
Raises:
QueryError: If query fails
"""
try:
with self.session() as session:
return session.query(model).get(id)
except Exception as e:
logger.error("Failed to get instance: %s", e)
raise QueryError(f"Failed to get instance: {e}") from e
1 change: 1 addition & 0 deletions command_line_assistant/daemon/database/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Main module to hold the database models"""
6 changes: 6 additions & 0 deletions command_line_assistant/daemon/database/models/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Base module to hold the declarative base for sqlalchemy models"""

from sqlalchemy.ext.declarative import declarative_base

#: The declarative base model for SQLAlchemy models
BaseModel = declarative_base()
43 changes: 43 additions & 0 deletions command_line_assistant/daemon/database/models/history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""Module containing SQLAlchemy models for the daemon."""

import uuid
from datetime import datetime

from sqlalchemy import Column, DateTime, ForeignKey, Integer, String
from sqlalchemy.dialects.postgresql import UUID
from sqlalchemy.orm import relationship

from command_line_assistant.daemon.database.models.base import BaseModel


class HistoryModel(BaseModel):
"""SQLAlchemy model for history table that maps to HistoryEntry dataclass."""

__tablename__ = "history"

id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
timestamp = Column(DateTime, default=datetime.utcnow())
deleted_at = Column(DateTime, nullable=True)

# Relationships
interaction_id = Column(
UUID(as_uuid=True), ForeignKey("interaction.id"), nullable=False
)
interaction = relationship("InteractionModel", backref="history")


class InteractionModel(BaseModel):
"""SQLAlchemy model for interaction table."""

__tablename__ = "interaction"

id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
query_text = Column(String)
query_role = Column(String, default="user")
response_text = Column(String)
response_role = Column(String, default="assistant")
response_tokens = Column(Integer, default=0)
session_id = Column(UUID(as_uuid=True), nullable=False, default=uuid.uuid4)
os_distribution = Column(String, default="RHEL")
os_version = Column(String, nullable=False)
os_arch = Column(String, nullable=False)
Loading

0 comments on commit 493ff52

Please sign in to comment.