From b8cda36aa8c573a43fc47300103db6ad2b57938f Mon Sep 17 00:00:00 2001 From: Viduni Wickramarachchi Date: Tue, 14 Jan 2025 13:19:18 -0500 Subject: [PATCH] [Obs AI Assistant] Ensure compatibility with the new semantic_text format (#206510) ## Summary Elasticsearch is introducing a breaking change in `8.18` and `9.0.0` to `semantic_text` fields. This PR ensures compatibility with this new `semantic_text` format. Relates to https://github.com/elastic/dev/issues/2936 ### Changes made - Remove the `inference` meta field exclusions in recalling from semantic search connectors (the `inference` subfield won't be returned anymore with the new `semantic_text` format) - Set the `index.mapping.semantic_text.use_legacy_format` index setting to false to force the new format for KB indices and evaluation framework indices. The following does not impact Obs AI Assistant as a part of this breaking change: | Breaking change | Do we use it? | Is there an impact? | |--------|--------|--------| | `inner_hits` is removed and `highlight is introduced | We don't use `inner_hits` at the moment | No | | The shape of semantic text field return type is updated to a string (previously it was an object with a `text` property) | Even though we query via the `semantic_text` field in KB entries, we don't use the result of the `semantic_text` field in `_source` | No | | pre-computed embeddings using a new `_inference_fields` metafield | Not used | No | ### Checklist - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios - [x] The PR description includes the appropriate Release Notes section, and the correct `release_note:*` label is applied per the [guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process) --- .../recall_from_search_connectors.ts | 3 -- .../setup_conversation_and_kb_index_assets.ts | 1 + .../scenarios/elasticsearch/index.spec.ts | 3 ++ .../evaluation/scenarios/esql/index.spec.ts | 6 +++ .../knowledge_base_migration.spec.ts | 42 +++++++++++++------ 5 files changed, 40 insertions(+), 15 deletions(-) diff --git a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts index 3001a28f6dbbb..6f55e706152cf 100644 --- a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts +++ b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts @@ -85,9 +85,6 @@ async function recallFromSemanticTextConnectors({ const params = { index: connectorIndices, size: 20, - _source: { - excludes: semanticTextFields.map((field) => `${field}.inference`), - }, query: { bool: { should: semanticTextFields.flatMap((field) => { diff --git a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts index 30d55400bbbda..b56628ce4b7ee 100644 --- a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts +++ b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts @@ -80,6 +80,7 @@ export async function setupConversationAndKbIndexAssets({ number_of_shards: 1, auto_expand_replicas: '0-1', hidden: true, + 'index.mapping.semantic_text.use_legacy_format': false, }, }, }); diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts index 722043dc29e3e..f286b8c0bb662 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts @@ -48,6 +48,9 @@ describe('Elasticsearch functions', () => { }, }, }, + settings: { + 'index.mapping.semantic_text.use_legacy_format': false, + }, }); await esClient.index({ diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts index 23b0af988035c..052cc19cba58b 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts @@ -72,6 +72,9 @@ describe('ES|QL query generation', () => { }, }, }, + settings: { + 'index.mapping.semantic_text.use_legacy_format': false, + }, }); await esClient.index({ @@ -125,6 +128,9 @@ describe('ES|QL query generation', () => { }, }, }, + settings: { + 'index.mapping.semantic_text.use_legacy_format': false, + }, }); await esClient.index({ diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts index bc6de98812c98..81fd896124692 100644 --- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts +++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts @@ -19,6 +19,27 @@ import { TINY_ELSER, } from './helpers'; +interface InferenceChunk { + text: string; + embeddings: any; +} + +interface InferenceData { + inference_id: string; + chunks: { + semantic_text: InferenceChunk[]; + }; +} + +interface SemanticTextField { + semantic_text: string; + _inference_fields?: { + semantic_text?: { + inference: InferenceData; + }; + }; +} + export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderContext) { const observabilityAIAssistantAPIClient = getService('observabilityAIAssistantApi'); const esArchiver = getService('esArchiver'); @@ -32,22 +53,18 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon async function getKnowledgeBaseEntries() { const res = (await es.search({ index: '.kibana-observability-ai-assistant-kb*', + // Add fields parameter to include inference metadata + fields: ['_inference_fields'], body: { query: { match_all: {}, }, }, - })) as SearchResponse< - KnowledgeBaseEntry & { - semantic_text: { - text: string; - inference: { inference_id: string; chunks: Array<{ text: string; embeddings: any }> }; - }; - } - >; + })) as SearchResponse; return res.hits.hits; } + // Failing: See https://github.com/elastic/kibana/issues/206474 describe.skip('When there are knowledge base entries (from 8.15 or earlier) that does not contain semantic_text embeddings', function () { // security_exception: action [indices:admin/settings/update] is unauthorized for user [testing-internal] with effective roles [superuser] on restricted indices [.kibana_security_solution_1,.kibana_task_manager_1,.kibana_alerting_cases_1,.kibana_usage_counters_1,.kibana_1,.kibana_ingest_1,.kibana_analytics_1], this action is granted by the index privileges [manage,all] @@ -100,12 +117,13 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon expect( orderBy(hits, '_source.title').map(({ _source }) => { - const { text, inference } = _source?.semantic_text!; + const text = _source?.semantic_text; + const inference = _source?._inference_fields?.semantic_text?.inference; return { - text, - inferenceId: inference.inference_id, - chunkCount: inference.chunks.length, + text: text ?? '', + inferenceId: inference?.inference_id, + chunkCount: inference?.chunks?.semantic_text?.length, }; }) ).to.eql([