Skip to content

Commit

Permalink
PR comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
mdwelsh committed Nov 19, 2024
1 parent 88b4149 commit d5880a6
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 24 deletions.
54 changes: 30 additions & 24 deletions apps/query-server/queryserver/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,16 @@
from typing import Annotated, Any, List, Optional, Union

from fastapi import FastAPI, Path
from pydantic import BaseModel
from pydantic import BaseModel, model_validator
from sse_starlette.sse import EventSourceResponse
from sycamore import DocSet
from sycamore.data import Document, MetadataDocument
from sycamore.query.client import SycamoreQueryClient
from sycamore.query.logical_plan import LogicalPlan
from sycamore.query.schema import OpenSearchSchema

# This is the uvicorn general logger, the error name is misleading.
# https://github.com/encode/uvicorn/issues/562
logger = logging.getLogger("uvicorn.error")


Expand Down Expand Up @@ -59,6 +61,14 @@ class Query(BaseModel):
stream: bool = False
"""If true, query results will be streamed back to the client as they are generated."""

@model_validator(mode="after")
def check_not_both_query_and_plan(self):
if self.query is not None and self.plan is not None:
raise ValueError("query and plan cannot both be specified")
if self.query is None and self.plan is None:
raise ValueError("one of query or plan is required")
return self


class QueryResult(BaseModel):
"""The result of a non-streaming query."""
Expand Down Expand Up @@ -115,20 +125,19 @@ async def generate_plan(query: Query) -> LogicalPlan:


def doc_to_json(doc: Document) -> Optional[dict[str, Any]]:
"""Render a Document as a JSON object. Returns None for MetadataDocuments."""
"""Render a Document as a JSON object. Only external properties and truncated text_representation
are included. Returns None for MetadataDocuments."""

NUM_TEXT_CHARS_GENERATE = 1024

if isinstance(doc, MetadataDocument):
return None

props_dict = {}
props_dict.update(doc.properties)
if "_schema" in props_dict:
del props_dict["_schema"]
if "_schema_class" in props_dict:
del props_dict["_schema_class"]
if "_doc_source" in props_dict:
del props_dict["_doc_source"]
props_dict.pop("_schema", None)
props_dict.pop("_schema_class", None)
props_dict.pop("_doc_source", None)
props_dict["text_representation"] = (
doc.text_representation[:NUM_TEXT_CHARS_GENERATE] if doc.text_representation is not None else None
)
Expand All @@ -140,17 +149,19 @@ async def run_query_stream(query: Query) -> EventSourceResponse:

async def query_runner():
try:
logger.info(f"Generating plan for {query.index}: {query.query}")
yield {
"event": "status",
"data": "Generating plan",
}
await asyncio.sleep(0.1)
plan = sqclient.generate_plan(query.query, query.index, sqclient.get_opensearch_schema(query.index))
logger.info(f"Generated plan: {plan}")
# Don't want to return these through the API.
plan.llm_plan = None
plan.llm_prompt = None
plan = query.plan
if plan is None:
logger.info(f"Generating plan for {query.index}: {query.query}")
yield {
"event": "status",
"data": "Generating plan",
}
await asyncio.sleep(0.1)
plan = sqclient.generate_plan(query.query, query.index, sqclient.get_opensearch_schema(query.index))
logger.info(f"Generated plan: {plan}")
# Don't want to return these through the API.
plan.llm_plan = None
plan.llm_prompt = None
yield {
"event": "plan",
"data": plan.model_dump_json(),
Expand Down Expand Up @@ -215,11 +226,6 @@ async def run_query(query: Query) -> Union[EventSourceResponse, QueryResult]:

logger.info(f"Running query: {query}")

if query.query is None and query.plan is None:
raise ValueError("query or plan is required")
if query.query is not None and query.plan is not None:
raise ValueError("query and plan cannot both be specified")

if query.stream:
return await run_query_stream(query)

Expand Down
4 changes: 4 additions & 0 deletions apps/query-server/queryserver/tests/test_queryserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,15 @@ def test_generate_plan_with_plan(test_client):
def test_query_with_no_query_or_plan(test_client):
response = test_client.post("/v1/query", json={})
assert response.status_code == 422
response = test_client.post("/v1/query", json={"stream": True})
assert response.status_code == 422


def test_query_with_query_and_plan(test_client):
response = test_client.post("/v1/query", json={"query": "test query", "plan": "test plan"})
assert response.status_code == 422
response = test_client.post("/v1/query", json={"query": "test query", "plan": "test plan", "stream": True})
assert response.status_code == 422


def test_run_plan(test_client):
Expand Down

0 comments on commit d5880a6

Please sign in to comment.