From b8cda36aa8c573a43fc47300103db6ad2b57938f Mon Sep 17 00:00:00 2001
From: Viduni Wickramarachchi <viduni.wickramarachchi@elastic.co>
Date: Tue, 14 Jan 2025 13:19:18 -0500
Subject: [PATCH] [Obs AI Assistant] Ensure compatibility with the new
 semantic_text format (#206510)

## Summary

Elasticsearch is introducing a breaking change in `8.18` and `9.0.0` to
`semantic_text` fields.
This PR ensures compatibility with this new `semantic_text` format.

Relates to https://github.com/elastic/dev/issues/2936

### Changes made
- Remove the `inference` meta field exclusions in recalling from
semantic search connectors (the `inference` subfield won't be returned
anymore with the new `semantic_text` format)
- Set the `index.mapping.semantic_text.use_legacy_format` index setting
to false to force the new format for KB indices and evaluation framework
indices.

The following does not impact Obs AI Assistant as a part of this
breaking change:

| Breaking change | Do we use it? | Is there an impact? |
|--------|--------|--------|
| `inner_hits` is removed and `highlight is introduced | We don't use
`inner_hits` at the moment | No |
| The shape of semantic text field return type is updated to a string
(previously it was an object with a `text` property) | Even though we
query via the `semantic_text` field in KB entries, we don't use the
result of the `semantic_text` field in `_source` | No |
| pre-computed embeddings using a new `_inference_fields` metafield |
Not used | No |


### Checklist

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [x] The PR description includes the appropriate Release Notes section,
and the correct `release_note:*` label is applied per the
[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)
---
 .../recall_from_search_connectors.ts          |  3 --
 .../setup_conversation_and_kb_index_assets.ts |  1 +
 .../scenarios/elasticsearch/index.spec.ts     |  3 ++
 .../evaluation/scenarios/esql/index.spec.ts   |  6 +++
 .../knowledge_base_migration.spec.ts          | 42 +++++++++++++------
 5 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts
index 3001a28f6dbbb..6f55e706152cf 100644
--- a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts
+++ b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/knowledge_base_service/recall_from_search_connectors.ts
@@ -85,9 +85,6 @@ async function recallFromSemanticTextConnectors({
   const params = {
     index: connectorIndices,
     size: 20,
-    _source: {
-      excludes: semanticTextFields.map((field) => `${field}.inference`),
-    },
     query: {
       bool: {
         should: semanticTextFields.flatMap((field) => {
diff --git a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts
index 30d55400bbbda..b56628ce4b7ee 100644
--- a/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts
+++ b/x-pack/platform/plugins/shared/observability_solution/observability_ai_assistant/server/service/setup_conversation_and_kb_index_assets.ts
@@ -80,6 +80,7 @@ export async function setupConversationAndKbIndexAssets({
           number_of_shards: 1,
           auto_expand_replicas: '0-1',
           hidden: true,
+          'index.mapping.semantic_text.use_legacy_format': false,
         },
       },
     });
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts
index 722043dc29e3e..f286b8c0bb662 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts
@@ -48,6 +48,9 @@ describe('Elasticsearch functions', () => {
               },
             },
           },
+          settings: {
+            'index.mapping.semantic_text.use_legacy_format': false,
+          },
         });
 
         await esClient.index({
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts
index 23b0af988035c..052cc19cba58b 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts
@@ -72,6 +72,9 @@ describe('ES|QL query generation', () => {
               },
             },
           },
+          settings: {
+            'index.mapping.semantic_text.use_legacy_format': false,
+          },
         });
 
         await esClient.index({
@@ -125,6 +128,9 @@ describe('ES|QL query generation', () => {
               },
             },
           },
+          settings: {
+            'index.mapping.semantic_text.use_legacy_format': false,
+          },
         });
 
         await esClient.index({
diff --git a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts
index bc6de98812c98..81fd896124692 100644
--- a/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts
+++ b/x-pack/test/api_integration/deployment_agnostic/apis/observability/ai_assistant/knowledge_base/knowledge_base_migration.spec.ts
@@ -19,6 +19,27 @@ import {
   TINY_ELSER,
 } from './helpers';
 
+interface InferenceChunk {
+  text: string;
+  embeddings: any;
+}
+
+interface InferenceData {
+  inference_id: string;
+  chunks: {
+    semantic_text: InferenceChunk[];
+  };
+}
+
+interface SemanticTextField {
+  semantic_text: string;
+  _inference_fields?: {
+    semantic_text?: {
+      inference: InferenceData;
+    };
+  };
+}
+
 export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderContext) {
   const observabilityAIAssistantAPIClient = getService('observabilityAIAssistantApi');
   const esArchiver = getService('esArchiver');
@@ -32,22 +53,18 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
   async function getKnowledgeBaseEntries() {
     const res = (await es.search({
       index: '.kibana-observability-ai-assistant-kb*',
+      // Add fields parameter to include inference metadata
+      fields: ['_inference_fields'],
       body: {
         query: {
           match_all: {},
         },
       },
-    })) as SearchResponse<
-      KnowledgeBaseEntry & {
-        semantic_text: {
-          text: string;
-          inference: { inference_id: string; chunks: Array<{ text: string; embeddings: any }> };
-        };
-      }
-    >;
+    })) as SearchResponse<KnowledgeBaseEntry & SemanticTextField>;
 
     return res.hits.hits;
   }
+
   // Failing: See https://github.com/elastic/kibana/issues/206474
   describe.skip('When there are knowledge base entries (from 8.15 or earlier) that does not contain semantic_text embeddings', function () {
     // security_exception: action [indices:admin/settings/update] is unauthorized for user [testing-internal] with effective roles [superuser] on restricted indices [.kibana_security_solution_1,.kibana_task_manager_1,.kibana_alerting_cases_1,.kibana_usage_counters_1,.kibana_1,.kibana_ingest_1,.kibana_analytics_1], this action is granted by the index privileges [manage,all]
@@ -100,12 +117,13 @@ export default function ApiTest({ getService }: DeploymentAgnosticFtrProviderCon
 
           expect(
             orderBy(hits, '_source.title').map(({ _source }) => {
-              const { text, inference } = _source?.semantic_text!;
+              const text = _source?.semantic_text;
+              const inference = _source?._inference_fields?.semantic_text?.inference;
 
               return {
-                text,
-                inferenceId: inference.inference_id,
-                chunkCount: inference.chunks.length,
+                text: text ?? '',
+                inferenceId: inference?.inference_id,
+                chunkCount: inference?.chunks?.semantic_text?.length,
               };
             })
           ).to.eql([