Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed citations when sections selected #3914

Merged
merged 2 commits into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backend/onyx/agents/agent_search/basic/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from onyx.agents.agent_search.shared_graph_utils.utils import write_custom_event
from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContext
from onyx.chat.stream_processing.answer_response_handler import AnswerResponseHandler
from onyx.chat.stream_processing.answer_response_handler import CitationResponseHandler
from onyx.chat.stream_processing.answer_response_handler import (
Expand All @@ -23,7 +24,7 @@ def process_llm_stream(
should_stream_answer: bool,
writer: StreamWriter,
final_search_results: list[LlmDoc] | None = None,
displayed_search_results: list[LlmDoc] | None = None,
displayed_search_results: list[OnyxContext] | list[LlmDoc] | None = None,
) -> AIMessageChunk:
tool_call_chunk = AIMessageChunk(content="")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from onyx.agents.agent_search.basic.utils import process_llm_stream
from onyx.agents.agent_search.models import GraphConfig
from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContexts
from onyx.tools.tool_implementations.search.search_tool import (
SEARCH_DOC_CONTENT_ID,
)
Expand Down Expand Up @@ -50,13 +51,11 @@ def basic_use_tool_response(
if yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID:
final_search_results = cast(list[LlmDoc], yield_item.response)
elif yield_item.id == SEARCH_DOC_CONTENT_ID:
search_contexts = yield_item.response.contexts
search_contexts = cast(OnyxContexts, yield_item.response).contexts
for doc in search_contexts:
if doc.document_id not in initial_search_results:
initial_search_results.append(doc)

initial_search_results = cast(list[LlmDoc], initial_search_results)

new_tool_call_chunk = AIMessageChunk(content="")
if not agent_config.behavior.skip_gen_ai_answer_generation:
stream = llm.stream(
Expand All @@ -70,7 +69,9 @@ def basic_use_tool_response(
True,
writer,
final_search_results=final_search_results,
displayed_search_results=initial_search_results,
# when the search tool is called with specific doc ids, initial search
# results are not output. But, we still want i.e. citations to be processed.
displayed_search_results=initial_search_results or final_search_results,
)

return BasicOutput(tool_call_chunk=new_tool_call_chunk)
3 changes: 2 additions & 1 deletion backend/onyx/chat/answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def citations_by_subquestion(self) -> dict[SubQuestionKey, list[CitationInfo]]:
citations_by_subquestion: dict[
SubQuestionKey, list[CitationInfo]
] = defaultdict(list)
basic_subq_key = SubQuestionKey(level=BASIC_KEY[0], question_num=BASIC_KEY[1])
for packet in self.processed_streamed_output:
if isinstance(packet, CitationInfo):
if packet.level_question_num is not None and packet.level is not None:
Expand All @@ -192,7 +193,7 @@ def citations_by_subquestion(self) -> dict[SubQuestionKey, list[CitationInfo]]:
)
].append(packet)
elif packet.level is None:
citations_by_subquestion[BASIC_SQ_KEY].append(packet)
citations_by_subquestion[basic_subq_key].append(packet)
return citations_by_subquestion

def is_cancelled(self) -> bool:
Expand Down
3 changes: 2 additions & 1 deletion backend/onyx/chat/stream_processing/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from pydantic import BaseModel

from onyx.chat.models import LlmDoc
from onyx.chat.models import OnyxContext
from onyx.context.search.models import InferenceChunk


Expand All @@ -11,7 +12,7 @@ class DocumentIdOrderMapping(BaseModel):


def map_document_id_order(
chunks: Sequence[InferenceChunk | LlmDoc], one_indexed: bool = True
chunks: Sequence[InferenceChunk | LlmDoc | OnyxContext], one_indexed: bool = True
) -> DocumentIdOrderMapping:
order_mapping = {}
current = 1 if one_indexed else 0
Expand Down
2 changes: 1 addition & 1 deletion backend/onyx/server/query_and_chat/chat_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ def upload_files_for_chat(
# to re-extract it every time we send a message
if file_type == ChatFileType.DOC:
extracted_text = extract_file_text(
file=file_content_io, # use the bytes we already read
file=file_content_io, # use the bytes we already read
file_name=file.filename or "",
)
text_file_id = str(uuid.uuid4())
Expand Down
36 changes: 0 additions & 36 deletions backend/onyx/tools/tool_implementations/search/search_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from sqlalchemy.orm import Session

from onyx.chat.chat_utils import llm_doc_from_inference_section
from onyx.chat.llm_response_handler import LLMCall
from onyx.chat.models import AnswerStyleConfig
from onyx.chat.models import ContextualPruningConfig
from onyx.chat.models import DocumentPruningConfig
Expand Down Expand Up @@ -371,41 +370,6 @@ def build_next_prompt(
prompt_config=self.prompt_config,
)

"""Other utility functions"""

@classmethod
def get_search_result(
cls, llm_call: LLMCall
) -> tuple[list[LlmDoc], list[LlmDoc]] | None:
"""
Returns the final search results and a map of docs to their original search rank (which is what is displayed to user)
"""
if not llm_call.tool_call_info:
return None

final_search_results = []
initial_search_results = []

for yield_item in llm_call.tool_call_info:
if (
isinstance(yield_item, ToolResponse)
and yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID
):
final_search_results = cast(list[LlmDoc], yield_item.response)
elif (
isinstance(yield_item, ToolResponse)
and yield_item.id == SEARCH_DOC_CONTENT_ID
):
search_contexts = yield_item.response.contexts
# original_doc_search_rank = 1
for doc in search_contexts:
if doc.document_id not in initial_search_results:
initial_search_results.append(doc)

initial_search_results = cast(list[LlmDoc], initial_search_results)

return final_search_results, initial_search_results


# Allows yielding the same responses as a SearchTool without being a SearchTool.
# SearchTool passed in to allow for access to SearchTool properties.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json

import pytest
import requests

from onyx.configs.constants import MessageType
Expand Down Expand Up @@ -66,9 +65,6 @@ def test_send_message_simple_with_history(reset: None) -> None:
assert found_doc["metadata"]["document_id"] == doc.id


@pytest.mark.xfail(
reason="agent search broke this",
)
def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> None:
# Creating an admin user (first user created is automatically an admin)
admin_user: DATestUser = UserManager.create(name="admin_user")
Expand Down
Loading