Skip to content

Commit

Permalink
[ENH] Layout Query Plan and Executor interface in frontend (#2920)
Browse files Browse the repository at this point in the history
## Description of changes

*Summarize the changes made by this PR.*
 - Improvements & Bug fixes
	 - N/A
 - New functionality
	 - Defines the query operators and query plan, which is the first step towards query pushdown for distributed Chroma.
	 - Defines the executor abstract class, which should execute any variant of query plan. Also provides dummy implementation for both local and distributed chroma.

## Test plan
*How are these changes tested?*

- [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust

## Documentation Changes
*Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?*
N/A
  • Loading branch information
Sicheng-Pan authored Oct 14, 2024
1 parent 3ce0afd commit 678da0e
Show file tree
Hide file tree
Showing 6 changed files with 142 additions and 0 deletions.
Empty file added chromadb/execution/__init__.py
Empty file.
19 changes: 19 additions & 0 deletions chromadb/execution/executor/abstract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from abc import abstractmethod
from typing import Sequence
from chromadb.config import Component
from chromadb.execution.expression.plan import CountPlan, GetPlan, KNNPlan
from chromadb.types import MetadataEmbeddingRecord, VectorEmbeddingRecord


class Executor(Component):
@abstractmethod
def count(self, plan: CountPlan) -> int:
pass

@abstractmethod
def get(self, plan: GetPlan) -> Sequence[MetadataEmbeddingRecord]:
pass

@abstractmethod
def knn(self, plan: KNNPlan) -> Sequence[Sequence[VectorEmbeddingRecord]]:
pass
27 changes: 27 additions & 0 deletions chromadb/execution/executor/distributed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Sequence
from overrides import overrides
from chromadb.config import System
from chromadb.execution.executor.abstract import Executor
from chromadb.execution.expression.plan import CountPlan, GetPlan, KNNPlan
from chromadb.segment import SegmentManager
from chromadb.types import MetadataEmbeddingRecord, VectorEmbeddingRecord


class DistributedExecutor(Executor):
_manager: SegmentManager

def __init__(self, system: System):
super().__init__(system)
self._manager = self.require(SegmentManager)

@overrides
def count(self, plan: CountPlan) -> int:
return 0

@overrides
def get(self, plan: GetPlan) -> Sequence[MetadataEmbeddingRecord]:
return list()

@overrides
def knn(self, plan: KNNPlan) -> Sequence[Sequence[VectorEmbeddingRecord]]:
return list()
27 changes: 27 additions & 0 deletions chromadb/execution/executor/local.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Sequence
from overrides import overrides
from chromadb.config import System
from chromadb.execution.executor.abstract import Executor
from chromadb.execution.expression.plan import CountPlan, GetPlan, KNNPlan
from chromadb.segment import SegmentManager
from chromadb.types import MetadataEmbeddingRecord, VectorEmbeddingRecord


class LocalExecutor(Executor):
_manager: SegmentManager

def __init__(self, system: System):
super().__init__(system)
self._manager = self.require(SegmentManager)

@overrides
def count(self, plan: CountPlan) -> int:
return 0

@overrides
def get(self, plan: GetPlan) -> Sequence[MetadataEmbeddingRecord]:
return list()

@overrides
def knn(self, plan: KNNPlan) -> Sequence[Sequence[VectorEmbeddingRecord]]:
return list()
45 changes: 45 additions & 0 deletions chromadb/execution/expression/operator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from dataclasses import dataclass
from typing import Optional

from chromadb.api.types import Embeddings, IDs
from chromadb.types import RequestVersionContext, Where, WhereDocument, Collection


@dataclass
class Scan:
collection: Collection

@property
def version(self) -> RequestVersionContext:
return RequestVersionContext(
collection_version=self.collection.version,
log_position=self.collection.log_position,
)


@dataclass
class Filter:
user_ids: Optional[IDs] = None
where: Optional[Where] = None
where_document: Optional[WhereDocument] = None


@dataclass
class KNN:
embeddings: Embeddings
fetch: int


@dataclass
class Limit:
skip: int = 0
fetch: Optional[int] = None


@dataclass
class Projection:
document: bool = False
embedding: bool = False
metadata: bool = False
rank: bool = False
uri: bool = False
24 changes: 24 additions & 0 deletions chromadb/execution/expression/plan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from dataclasses import dataclass, field

from chromadb.execution.expression.operator import KNN, Filter, Limit, Projection, Scan


@dataclass
class CountPlan:
scan: Scan


@dataclass
class GetPlan:
scan: Scan
filter: Filter = field(default_factory=Filter)
limit: Limit = field(default_factory=Limit)
projection: Projection = field(default_factory=Projection)


@dataclass
class KNNPlan:
scan: Scan
knn: KNN
filter: Filter = field(default_factory=Filter)
projection: Projection = field(default_factory=Projection)

0 comments on commit 678da0e

Please sign in to comment.