Skip to content

Commit

Permalink
fixed citations when sections selected (#3914)
Browse files Browse the repository at this point in the history
* removed some dead code and fixed citations when a search request is made with sections selected

* fix black formatting issue
  • Loading branch information
evan-danswer authored Feb 5, 2025
1 parent b469a7e commit 29f5f4e
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 48 deletions.
3 changes: 2 additions & 1 deletion backend/onyx/agents/agent_search/basic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContext
from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
from onyx.chat.stream_processing.answer_response_handler import (
Expand All @@ -23,7 +24,7 @@ def process_llm_stream(
should_stream_answer: bool,
writer: StreamWriter,
final_search_results: list[LlmDoc] | None = None,
displayed_search_results: list[LlmDoc] | None = None,
displayed_search_results: list[OnyxContext] | list[LlmDoc] | None = None,
) -> AIMessageChunk:
tool_call_chunk = AIMessageChunk(content="")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from onyx.agents.agent_search.basic.utils import process_llm_stream
from onyx.agents.agent_search.models import GraphConfig
from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContexts
from onyx.tools.tool_implementations.search.search_tool import (
SEARCH_DOC_CONTENT_ID,
)
Expand Down Expand Up @@ -50,13 +51,11 @@ def basic_use_tool_response(
if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID:
final_search_results = cast(list[LlmDoc], yield_item.response)
elif yield_item.id == SEARCH_DOC_CONTENT_ID:
search_contexts = yield_item.response.contexts
search_contexts = cast(OnyxContexts, yield_item.response).contexts
for doc in search_contexts:
if doc.document_id not in initial_search_results:
initial_search_results.append(doc)

initial_search_results = cast(list[LlmDoc], initial_search_results)

new_tool_call_chunk = AIMessageChunk(content="")
if not agent_config.behavior.skip_gen_ai_answer_generation:
stream = llm.stream(
Expand All @@ -70,7 +69,9 @@ def basic_use_tool_response(
True,
writer,
final_search_results=final_search_results,
displayed_search_results=initial_search_results,
# when the search tool is called with specific doc ids, initial search
# results are not output. But, we still want i.e. citations to be processed.
displayed_search_results=initial_search_results or final_search_results,
)

return BasicOutput(tool_call_chunk=new_tool_call_chunk)
3 changes: 2 additions & 1 deletion backend/onyx/chat/answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def citations_by_subquestion(self) -> dict[SubQuestionKey, list[CitationInfo]]:
citations_by_subquestion: dict[
SubQuestionKey, list[CitationInfo]
] = defaultdict(list)
basic_subq_key = SubQuestionKey(level=BASIC_KEY[0], question_num=BASIC_KEY[1])
for packet in self.processed_streamed_output:
if isinstance(packet, CitationInfo):
if packet.level_question_num is not None and packet.level is not None:
Expand All @@ -192,7 +193,7 @@ def citations_by_subquestion(self) -> dict[SubQuestionKey, list[CitationInfo]]:
)
].append(packet)
elif packet.level is None:
citations_by_subquestion[BASIC_SQ_KEY].append(packet)
citations_by_subquestion[basic_subq_key].append(packet)
return citations_by_subquestion

def is_cancelled(self) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion backend/onyx/chat/stream_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pydantic import BaseModel

from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContext
from onyx.context.search.models import InferenceChunk


Expand All @@ -11,7 +12,7 @@ class DocumentIdOrderMapping(BaseModel):


def map_document_id_order(
chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
chunks: Sequence[InferenceChunk | LlmDoc | OnyxContext], one_indexed: bool = True
) -> DocumentIdOrderMapping:
order_mapping = {}
current = 1 if one_indexed else 0
Expand Down
2 changes: 1 addition & 1 deletion backend/onyx/server/query_and_chat/chat_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ def upload_files_for_chat(
# to re-extract it every time we send a message
if file_type == ChatFileType.DOC:
extracted_text = extract_file_text(
file=file_content_io, # use the bytes we already read
file=file_content_io, # use the bytes we already read
file_name=file.filename or "",
)
text_file_id = str(uuid.uuid4())
Expand Down
36 changes: 0 additions & 36 deletions backend/onyx/tools/tool_implementations/search/search_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from sqlalchemy.orm import Session

from onyx.chat.chat_utils import llm_doc_from_inference_section
from onyx.chat.llm_response_handler import LLMCall
from onyx.chat.models import AnswerStyleConfig
from onyx.chat.models import ContextualPruningConfig
from onyx.chat.models import DocumentPruningConfig
Expand Down Expand Up @@ -371,41 +370,6 @@ def build_next_prompt(
prompt_config=self.prompt_config,
)

"""Other utility functions"""

@classmethod
def get_search_result(
cls, llm_call: LLMCall
) -> tuple[list[LlmDoc], list[LlmDoc]] | None:
"""
Returns the final search results and a map of docs to their original search rank (which is what is displayed to user)
"""
if not llm_call.tool_call_info:
return None

final_search_results = []
initial_search_results = []

for yield_item in llm_call.tool_call_info:
if (
isinstance(yield_item, ToolResponse)
and yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID
):
final_search_results = cast(list[LlmDoc], yield_item.response)
elif (
isinstance(yield_item, ToolResponse)
and yield_item.id == SEARCH_DOC_CONTENT_ID
):
search_contexts = yield_item.response.contexts
# original_doc_search_rank = 1
for doc in search_contexts:
if doc.document_id not in initial_search_results:
initial_search_results.append(doc)

initial_search_results = cast(list[LlmDoc], initial_search_results)

return final_search_results, initial_search_results


# Allows yielding the same responses as a SearchTool without being a SearchTool.
# SearchTool passed in to allow for access to SearchTool properties.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json

import pytest
import requests

from onyx.configs.constants import MessageType
Expand Down Expand Up @@ -66,9 +65,6 @@ def test_send_message_simple_with_history(reset: None) -> None:
assert found_doc["metadata"]["document_id"] == doc.id


@pytest.mark.xfail(
reason="agent search broke this",
)
def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> None:
# Creating an admin user (first user created is automatically an admin)
admin_user: DATestUser = UserManager.create(name="admin_user")
Expand Down

0 comments on commit 29f5f4e

Please sign in to comment.