From 678da0e90492206e1cebc79120c8b71bfe24deaf Mon Sep 17 00:00:00 2001 From: Macronova <60079945+Sicheng-Pan@users.noreply.github.com> Date: Mon, 14 Oct 2024 10:36:32 -0700 Subject: [PATCH] [ENH] Layout Query Plan and Executor interface in frontend (#2920) ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - N/A - New functionality - Defines the query operators and query plan, which is the first step towards query pushdown for distributed Chroma. - Defines the executor abstract class, which should execute any variant of query plan. Also provides dummy implementation for both local and distributed chroma. ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Documentation Changes *Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?* N/A --- chromadb/execution/__init__.py | 0 chromadb/execution/executor/abstract.py | 19 +++++++++ chromadb/execution/executor/distributed.py | 27 +++++++++++++ chromadb/execution/executor/local.py | 27 +++++++++++++ chromadb/execution/expression/operator.py | 45 ++++++++++++++++++++++ chromadb/execution/expression/plan.py | 24 ++++++++++++ 6 files changed, 142 insertions(+) create mode 100644 chromadb/execution/__init__.py create mode 100644 chromadb/execution/executor/abstract.py create mode 100644 chromadb/execution/executor/distributed.py create mode 100644 chromadb/execution/executor/local.py create mode 100644 chromadb/execution/expression/operator.py create mode 100644 chromadb/execution/expression/plan.py diff --git a/chromadb/execution/__init__.py b/chromadb/execution/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/chromadb/execution/executor/abstract.py b/chromadb/execution/executor/abstract.py new file mode 100644 index 00000000000..c7672d4fa89 --- /dev/null +++ b/chromadb/execution/executor/abstract.py @@ -0,0 +1,19 @@ +from abc import abstractmethod +from typing import Sequence +from chromadb.config import Component +from chromadb.execution.expression.plan import CountPlan, GetPlan, KNNPlan +from chromadb.types import MetadataEmbeddingRecord, VectorEmbeddingRecord + + +class Executor(Component): + @abstractmethod + def count(self, plan: CountPlan) -> int: + pass + + @abstractmethod + def get(self, plan: GetPlan) -> Sequence[MetadataEmbeddingRecord]: + pass + + @abstractmethod + def knn(self, plan: KNNPlan) -> Sequence[Sequence[VectorEmbeddingRecord]]: + pass diff --git a/chromadb/execution/executor/distributed.py b/chromadb/execution/executor/distributed.py new file mode 100644 index 00000000000..a6a3bc8a452 --- /dev/null +++ b/chromadb/execution/executor/distributed.py @@ -0,0 +1,27 @@ +from typing import Sequence +from overrides import overrides +from chromadb.config import System +from chromadb.execution.executor.abstract import Executor +from chromadb.execution.expression.plan import CountPlan, GetPlan, KNNPlan +from chromadb.segment import SegmentManager +from chromadb.types import MetadataEmbeddingRecord, VectorEmbeddingRecord + + +class DistributedExecutor(Executor): + _manager: SegmentManager + + def __init__(self, system: System): + super().__init__(system) + self._manager = self.require(SegmentManager) + + @overrides + def count(self, plan: CountPlan) -> int: + return 0 + + @overrides + def get(self, plan: GetPlan) -> Sequence[MetadataEmbeddingRecord]: + return list() + + @overrides + def knn(self, plan: KNNPlan) -> Sequence[Sequence[VectorEmbeddingRecord]]: + return list() diff --git a/chromadb/execution/executor/local.py b/chromadb/execution/executor/local.py new file mode 100644 index 00000000000..cbccf2c11ae --- /dev/null +++ b/chromadb/execution/executor/local.py @@ -0,0 +1,27 @@ +from typing import Sequence +from overrides import overrides +from chromadb.config import System +from chromadb.execution.executor.abstract import Executor +from chromadb.execution.expression.plan import CountPlan, GetPlan, KNNPlan +from chromadb.segment import SegmentManager +from chromadb.types import MetadataEmbeddingRecord, VectorEmbeddingRecord + + +class LocalExecutor(Executor): + _manager: SegmentManager + + def __init__(self, system: System): + super().__init__(system) + self._manager = self.require(SegmentManager) + + @overrides + def count(self, plan: CountPlan) -> int: + return 0 + + @overrides + def get(self, plan: GetPlan) -> Sequence[MetadataEmbeddingRecord]: + return list() + + @overrides + def knn(self, plan: KNNPlan) -> Sequence[Sequence[VectorEmbeddingRecord]]: + return list() diff --git a/chromadb/execution/expression/operator.py b/chromadb/execution/expression/operator.py new file mode 100644 index 00000000000..d3a58de16c6 --- /dev/null +++ b/chromadb/execution/expression/operator.py @@ -0,0 +1,45 @@ +from dataclasses import dataclass +from typing import Optional + +from chromadb.api.types import Embeddings, IDs +from chromadb.types import RequestVersionContext, Where, WhereDocument, Collection + + +@dataclass +class Scan: + collection: Collection + + @property + def version(self) -> RequestVersionContext: + return RequestVersionContext( + collection_version=self.collection.version, + log_position=self.collection.log_position, + ) + + +@dataclass +class Filter: + user_ids: Optional[IDs] = None + where: Optional[Where] = None + where_document: Optional[WhereDocument] = None + + +@dataclass +class KNN: + embeddings: Embeddings + fetch: int + + +@dataclass +class Limit: + skip: int = 0 + fetch: Optional[int] = None + + +@dataclass +class Projection: + document: bool = False + embedding: bool = False + metadata: bool = False + rank: bool = False + uri: bool = False diff --git a/chromadb/execution/expression/plan.py b/chromadb/execution/expression/plan.py new file mode 100644 index 00000000000..54f45c48704 --- /dev/null +++ b/chromadb/execution/expression/plan.py @@ -0,0 +1,24 @@ +from dataclasses import dataclass, field + +from chromadb.execution.expression.operator import KNN, Filter, Limit, Projection, Scan + + +@dataclass +class CountPlan: + scan: Scan + + +@dataclass +class GetPlan: + scan: Scan + filter: Filter = field(default_factory=Filter) + limit: Limit = field(default_factory=Limit) + projection: Projection = field(default_factory=Projection) + + +@dataclass +class KNNPlan: + scan: Scan + knn: KNN + filter: Filter = field(default_factory=Filter) + projection: Projection = field(default_factory=Projection)