From 1f4cbbb30964ae8fc63137ca443c2b122def9681 Mon Sep 17 00:00:00 2001 From: Kawika Avilla Date: Thu, 29 Aug 2024 15:31:56 -0700 Subject: [PATCH] [discover] S3 data connection (#7917) Update the S3 type config. This commit does: Browsing connections, databases, tables Creating a dataset from a table and setting the string correctly. This commit does NOT: Cache data structures Use session ID for querying Re-add async polling to search interceptor --------- Signed-off-by: Kawika Avilla Co-authored-by: opensearch-changeset-bot[bot] <154024398+opensearch-changeset-bot[bot]@users.noreply.github.com> --- changelogs/fragments/7917.yml | 2 + src/plugins/data/common/constants.ts | 7 +- .../dataset_service/dataset_service.ts | 7 +- .../dataset_service/lib/index_pattern_type.ts | 4 +- .../dataset_service/lib/index_type.ts | 20 +- .../query_string/dataset_service/types.ts | 6 +- .../ui/dataset_selector/advanced_selector.tsx | 8 +- .../ui/dataset_selector/dataset_explorer.tsx | 8 +- .../ui/dataset_selector/dataset_selector.tsx | 8 +- .../public/datasets/s3_handler.ts | 246 ++++++++++++++---- 10 files changed, 226 insertions(+), 90 deletions(-) create mode 100644 changelogs/fragments/7917.yml diff --git a/changelogs/fragments/7917.yml b/changelogs/fragments/7917.yml new file mode 100644 index 000000000000..60262ad04c44 --- /dev/null +++ b/changelogs/fragments/7917.yml @@ -0,0 +1,2 @@ +feat: +- Add S3 data exploration for connections, databases, and tables ([#7917](https://github.com/opensearch-project/OpenSearch-Dashboards/pull/7917)) \ No newline at end of file diff --git a/src/plugins/data/common/constants.ts b/src/plugins/data/common/constants.ts index 97c465762986..289d56d4d064 100644 --- a/src/plugins/data/common/constants.ts +++ b/src/plugins/data/common/constants.ts @@ -38,10 +38,15 @@ export const DEFAULT_DATA = { type: 'ROOT', meta: { type: DATA_STRUCTURE_META_TYPES.FEATURE, - icon: 'folderOpen', + icon: { type: 'folderOpen' }, tooltip: 'Root Data Structure', }, } as DataStructure, + LOCAL_DATASOURCE: { + id: '', + title: 'Local Cluster', + type: 'DATA_SOURCE', + }, }, SET_TYPES: { diff --git a/src/plugins/data/public/query/query_string/dataset_service/dataset_service.ts b/src/plugins/data/public/query/query_string/dataset_service/dataset_service.ts index ee4ab700e847..faa35328075f 100644 --- a/src/plugins/data/public/query/query_string/dataset_service/dataset_service.ts +++ b/src/plugins/data/public/query/query_string/dataset_service/dataset_service.ts @@ -3,7 +3,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { CoreStart, SavedObjectsClientContract } from 'opensearch-dashboards/public'; +import { CoreStart } from 'opensearch-dashboards/public'; import { Dataset, DataStructure, @@ -15,6 +15,7 @@ import { import { DatasetTypeConfig } from './types'; import { indexPatternTypeConfig, indexTypeConfig } from './lib'; import { IndexPatternsContract } from '../../../index_patterns'; +import { IDataPluginServices } from '../../../types'; export class DatasetService { private indexPatterns?: IndexPatternsContract; @@ -83,7 +84,7 @@ export class DatasetService { } public fetchOptions( - savedObjects: SavedObjectsClientContract, + services: IDataPluginServices, path: DataStructure[], dataType: string ): Promise { @@ -91,7 +92,7 @@ export class DatasetService { if (!type) { throw new Error(`No handler found for type: ${path[0]}`); } - return type.fetch(savedObjects, path); + return type.fetch(services, path); } private async fetchDefaultDataset(): Promise { diff --git a/src/plugins/data/public/query/query_string/dataset_service/lib/index_pattern_type.ts b/src/plugins/data/public/query/query_string/dataset_service/lib/index_pattern_type.ts index 1034850c41ba..b92fb70631f6 100644 --- a/src/plugins/data/public/query/query_string/dataset_service/lib/index_pattern_type.ts +++ b/src/plugins/data/public/query/query_string/dataset_service/lib/index_pattern_type.ts @@ -44,9 +44,9 @@ export const indexPatternTypeConfig: DatasetTypeConfig = { } as Dataset; }, - fetch: async (savedObjects, path) => { + fetch: async (services, path) => { const dataStructure = path[path.length - 1]; - const indexPatterns = await fetchIndexPatterns(savedObjects); + const indexPatterns = await fetchIndexPatterns(services.savedObjects.client); return { ...dataStructure, columnHeader: 'Index patterns', diff --git a/src/plugins/data/public/query/query_string/dataset_service/lib/index_type.ts b/src/plugins/data/public/query/query_string/dataset_service/lib/index_type.ts index fe18afbd2e76..73df7b1af556 100644 --- a/src/plugins/data/public/query/query_string/dataset_service/lib/index_type.ts +++ b/src/plugins/data/public/query/query_string/dataset_service/lib/index_type.ts @@ -15,14 +15,6 @@ import { DatasetTypeConfig } from '../types'; import { getSearchService, getIndexPatterns } from '../../../../services'; import { injectMetaToDataStructures } from './utils'; -const INDEX_INFO = { - LOCAL_DATASOURCE: { - id: '', - title: 'Local Cluster', - type: 'DATA_SOURCE', - }, -}; - export const indexTypeConfig: DatasetTypeConfig = { id: DEFAULT_DATA.SET_TYPES.INDEX, title: 'Indexes', @@ -47,11 +39,11 @@ export const indexTypeConfig: DatasetTypeConfig = { title: dataSource.title, type: dataSource.type, } - : INDEX_INFO.LOCAL_DATASOURCE, + : DEFAULT_DATA.STRUCTURES.LOCAL_DATASOURCE, }; }, - fetch: async (savedObjects, path) => { + fetch: async (services, path) => { const dataStructure = path[path.length - 1]; switch (dataStructure.type) { case 'DATA_SOURCE': { @@ -69,7 +61,7 @@ export const indexTypeConfig: DatasetTypeConfig = { } default: { - const dataSources = await fetchDataSources(savedObjects); + const dataSources = await fetchDataSources(services.savedObjects.client); return { ...dataStructure, columnHeader: 'Cluster', @@ -97,12 +89,12 @@ export const indexTypeConfig: DatasetTypeConfig = { }; const fetchDataSources = async (client: SavedObjectsClientContract) => { - const resp = await client.find({ + const response = await client.find({ type: 'data-source', perPage: 10000, }); - const dataSources: DataStructure[] = [INDEX_INFO.LOCAL_DATASOURCE].concat( - resp.savedObjects.map((savedObject) => ({ + const dataSources: DataStructure[] = [DEFAULT_DATA.STRUCTURES.LOCAL_DATASOURCE].concat( + response.savedObjects.map((savedObject) => ({ id: savedObject.id, title: savedObject.attributes.title, type: 'DATA_SOURCE', diff --git a/src/plugins/data/public/query/query_string/dataset_service/types.ts b/src/plugins/data/public/query/query_string/dataset_service/types.ts index 8d597694ac49..0ea84d67f2a0 100644 --- a/src/plugins/data/public/query/query_string/dataset_service/types.ts +++ b/src/plugins/data/public/query/query_string/dataset_service/types.ts @@ -2,9 +2,9 @@ * Copyright OpenSearch Contributors * SPDX-License-Identifier: Apache-2.0 */ -import { SavedObjectsClientContract } from 'opensearch-dashboards/public'; import { EuiIconProps } from '@elastic/eui'; import { Dataset, DatasetField, DataStructure } from '../../../../common'; +import { IDataPluginServices } from '../../../types'; /** * Configuration for handling dataset operations. @@ -29,11 +29,11 @@ export interface DatasetTypeConfig { toDataset: (path: DataStructure[]) => Dataset; /** * Fetches child options for a given DataStructure. - * @param {SavedObjectsClientContract} client - The saved objects client. + * @param {IDataPluginServices} services - The data plugin services. * @param {DataStructure} dataStructure - The parent DataStructure. * @returns {Promise} A promise that resolves to a DatasetHandlerFetchResponse. */ - fetch: (client: SavedObjectsClientContract, path: DataStructure[]) => Promise; + fetch: (services: IDataPluginServices, path: DataStructure[]) => Promise; /** * Fetches fields for the dataset. * @returns {Promise} A promise that resolves to an array of DatasetFields. diff --git a/src/plugins/data/public/ui/dataset_selector/advanced_selector.tsx b/src/plugins/data/public/ui/dataset_selector/advanced_selector.tsx index 8afaedbb492e..734153452eea 100644 --- a/src/plugins/data/public/ui/dataset_selector/advanced_selector.tsx +++ b/src/plugins/data/public/ui/dataset_selector/advanced_selector.tsx @@ -4,7 +4,6 @@ */ import React, { useState } from 'react'; -import { SavedObjectsClientContract } from 'opensearch-dashboards/public'; import { BaseDataset, DATA_STRUCTURE_META_TYPES, @@ -15,13 +14,14 @@ import { import { DatasetExplorer } from './dataset_explorer'; import { Configurator } from './configurator'; import { getQueryService } from '../../services'; +import { IDataPluginServices } from '../../types'; export const AdvancedSelector = ({ - savedObjects, + services, onSelect, onCancel, }: { - savedObjects: SavedObjectsClientContract; + services: IDataPluginServices; onSelect: (dataset: Dataset) => void; onCancel: () => void; }) => { @@ -59,7 +59,7 @@ export const AdvancedSelector = ({ /> ) : ( void; @@ -55,7 +55,7 @@ export const DatasetExplorer = ({ } setLoading(true); - const nextDataStructure = await typeConfig.fetch(savedObjects, nextPath); + const nextDataStructure = await typeConfig.fetch(services, nextPath); setLoading(false); setPath([...newPath, nextDataStructure]); diff --git a/src/plugins/data/public/ui/dataset_selector/dataset_selector.tsx b/src/plugins/data/public/ui/dataset_selector/dataset_selector.tsx index 0a251a8dfe3e..d442360e4d59 100644 --- a/src/plugins/data/public/ui/dataset_selector/dataset_selector.tsx +++ b/src/plugins/data/public/ui/dataset_selector/dataset_selector.tsx @@ -34,7 +34,7 @@ export const DatasetSelector = ({ }: DatasetSelectorProps) => { const [isOpen, setIsOpen] = useState(false); const [datasets, setDatasets] = useState([]); - const { overlays, savedObjects } = services; + const { overlays } = services; const isMounted = useRef(true); @@ -53,7 +53,7 @@ export const DatasetSelector = ({ const typeConfig = datasetService.getType(DEFAULT_DATA.SET_TYPES.INDEX_PATTERN); if (!typeConfig) return; - const fetchedIndexPatternDataStructures = await typeConfig.fetch(savedObjects.client, []); + const fetchedIndexPatternDataStructures = await typeConfig.fetch(services, []); if (!isMounted.current) return; @@ -67,7 +67,7 @@ export const DatasetSelector = ({ if (!selectedDataset && fetchedDatasets.length > 0) { setSelectedDataset(fetchedDatasets[0]); } - }, [datasetService, savedObjects.client, selectedDataset, setSelectedDataset]); + }, [datasetService, selectedDataset, services, setSelectedDataset]); useEffect(() => { fetchDatasets(); @@ -183,7 +183,7 @@ export const DatasetSelector = ({ const overlay = overlays?.openModal( toMountPoint( { overlay?.close(); if (dataset) { diff --git a/src/plugins/query_enhancements/public/datasets/s3_handler.ts b/src/plugins/query_enhancements/public/datasets/s3_handler.ts index 2613f016143f..f54435df293f 100644 --- a/src/plugins/query_enhancements/public/datasets/s3_handler.ts +++ b/src/plugins/query_enhancements/public/datasets/s3_handler.ts @@ -3,111 +3,247 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { SavedObjectsClientContract } from 'opensearch-dashboards/public'; -import { DataStructure, Dataset, DatasetField } from 'src/plugins/data/common'; -import { DatasetTypeConfig } from 'src/plugins/data/public'; +import { HttpSetup, SavedObjectsClientContract } from 'opensearch-dashboards/public'; +import { timer } from 'rxjs'; +import { filter, map, mergeMap, takeWhile } from 'rxjs/operators'; +import { + DATA_STRUCTURE_META_TYPES, + DEFAULT_DATA, + DataSourceMeta, + DataStructure, + DataStructureCustomMeta, + Dataset, + DatasetField, +} from '../../../data/common'; +import { DatasetTypeConfig, IDataPluginServices } from '../../../data/public'; import { DATASET } from '../../common'; const S3_ICON = 'visTable'; export const s3TypeConfig: DatasetTypeConfig = { id: DATASET.S3, - title: DATASET.S3, + title: 'S3 Connections', meta: { icon: { type: S3_ICON }, - tooltip: 'S3 Data Source', + tooltip: 'Amazon S3 Connections', }, toDataset: (path: DataStructure[]): Dataset => { - const s3 = path[path.length - 1]; - const dataSource = path.find((ds) => ds.type === DATASET.S3); + const dataSource = path.find((ds) => ds.type === 'DATA_SOURCE'); + const connection = path.find((ds) => ds.type === 'CONNECTION'); + const database = path.find((ds) => ds.type === 'DATABASE'); + const table = path[path.length - 1]; return { - id: s3.id, - title: s3.title, + id: table.id, + title: `${connection?.title}.${database?.title}.${table.title}`, type: DATASET.S3, dataSource: dataSource ? { id: dataSource.id, title: dataSource.title, type: dataSource.type, + meta: table.meta as DataSourceMeta, } - : undefined, + : DEFAULT_DATA.STRUCTURES.LOCAL_DATASOURCE, }; }, - fetch: async ( - savedObjects: SavedObjectsClientContract, - path: DataStructure[] - ): Promise => { + fetch: async (services: IDataPluginServices, path: DataStructure[]): Promise => { const dataStructure = path[path.length - 1]; + const { + http, + savedObjects: { client }, + } = services; + switch (dataStructure.type) { - case DATASET.S3: + case 'DATA_SOURCE': { + const connections = await fetchConnections(http, dataStructure); return { ...dataStructure, - columnHeader: 'Connections', hasNext: true, - children: [ - { - id: `${dataStructure.id}::mys3`, - title: 'mys3', - type: 'CONNECTION', - }, - ], + columnHeader: 'Connections', + children: connections, }; - case 'CONNECTION': + } + case 'CONNECTION': { + const databases = await fetchDatabases(http, path); return { ...dataStructure, columnHeader: 'Databases', hasNext: true, - children: [ - { - id: `${dataStructure.id}.defaultDb`, - title: 'defaultDb', - type: 'DATABASE', - }, - ], + children: databases, }; - case 'DATABASE': + } + case 'DATABASE': { + const tables = await fetchTables(http, path); return { ...dataStructure, columnHeader: 'Tables', hasNext: false, - children: [ - { - id: `${dataStructure.id}.table1`, - title: 'table1', - type: 'TABLE', - }, - { - id: `${dataStructure.id}.table2`, - title: 'table2', - type: 'TABLE', - }, - ], + children: tables, }; - default: - const s3DataSources = await fetchS3DataSources(savedObjects); + } + default: { + const dataSources = await fetchDataSources(client); return { ...dataStructure, - columnHeader: 'S3 Data Sources', - hasNext: false, - children: s3DataSources, + columnHeader: 'Clusters', + hasNext: true, + children: dataSources, }; + } } }, fetchFields: async (dataset: Dataset): Promise => { - // This is a placeholder. You'll need to implement the actual logic to fetch S3 fields. - // For now, we'll return an empty array. return []; }, - supportedLanguages: (): string[] => { - return ['sql']; // Assuming S3 only supports SQL queries + supportedLanguages: (dataset: Dataset): string[] => { + return ['SQL']; }, }; -const fetchS3DataSources = async (client: SavedObjectsClientContract): Promise => { - return []; +const fetch = ( + http: HttpSetup, + path: DataStructure[], + type: 'DATABASE' | 'TABLE' +): Promise => { + return new Promise((resolve, reject) => { + const dataSource = path.find((ds) => ds.type === 'DATA_SOURCE'); + const parent = path[path.length - 1]; + const meta = parent.meta as DataStructureCustomMeta; + + timer(0, 5000) + .pipe( + mergeMap(() => + http.fetch('../../api/enhancements/datasource/jobs', { + query: { + id: dataSource?.id, + queryId: meta.query.id, + }, + }) + ), + takeWhile( + (response) => response.status !== 'SUCCESS' && response.status !== 'FAILED', + true + ), + filter((response) => response.status === 'SUCCESS'), + map((response) => { + if (response.status === 'FAILED') { + throw new Error('Job failed'); + } + return response.datarows.map((item: string[]) => ({ + id: `${parent.id}.${item[type === 'DATABASE' ? 0 : 1]}`, + title: item[type === 'DATABASE' ? 0 : 1], + type, + meta: { + type: DATA_STRUCTURE_META_TYPES.CUSTOM, + query: meta.query, + session: meta.session, + } as DataStructureCustomMeta, + })); + }) + ) + .subscribe({ + next: (dataStructures) => { + resolve(dataStructures); + }, + error: (error) => { + reject(error); + }, + complete: () => { + reject(new Error('No response')); + }, + }); + }); +}; + +const setMeta = (dataStructure: DataStructure, response: any) => { + return { + ...dataStructure.meta, + query: { id: response.queryId }, + session: { id: response.sessionId }, + } as DataStructureCustomMeta; +}; + +const fetchDataSources = async (client: SavedObjectsClientContract): Promise => { + const resp = await client.find({ + type: 'data-source', + perPage: 10000, + }); + const dataSources: DataStructure[] = [DEFAULT_DATA.STRUCTURES.LOCAL_DATASOURCE]; + return dataSources.concat( + resp.savedObjects.map((savedObject) => ({ + id: savedObject.id, + title: savedObject.attributes.title, + type: 'DATA_SOURCE', + meta: { + query: { + id: savedObject.id, + }, + type: DATA_STRUCTURE_META_TYPES.CUSTOM, + } as DataStructureCustomMeta, + })) + ); +}; + +const fetchConnections = async ( + http: HttpSetup, + dataSource: DataStructure +): Promise => { + const query = (dataSource.meta as DataStructureCustomMeta).query; + const response = await http.fetch(`../../api/enhancements/datasource/external`, { + query, + }); + + return response + .filter((ds: any) => ds.connector === 'S3GLUE') + .map((ds: any) => ({ + id: `${dataSource.id}::${ds.name}`, + title: ds.name, + type: 'CONNECTION', + meta: { + query, + type: DATA_STRUCTURE_META_TYPES.CUSTOM, + } as DataStructureCustomMeta, + })); +}; + +const fetchDatabases = async (http: HttpSetup, path: DataStructure[]): Promise => { + const dataSource = path.find((ds) => ds.type === 'DATA_SOURCE'); + const connection = path[path.length - 1]; + const query = (connection.meta as DataStructureCustomMeta).query; + const response = await http.post(`../../api/enhancements/datasource/jobs`, { + body: JSON.stringify({ + lang: 'sql', + query: `SHOW DATABASES in ${connection.title}`, + datasource: dataSource?.title, + }), + query, + }); + + connection.meta = setMeta(connection, response); + + return fetch(http, path, 'DATABASE'); +}; + +const fetchTables = async (http: HttpSetup, path: DataStructure[]): Promise => { + const dataSource = path.find((ds) => ds.type === 'DATA_SOURCE'); + const database = path[path.length - 1]; + const response = await http.post(`../../api/enhancements/datasource/jobs`, { + body: JSON.stringify({ + lang: 'sql', + query: `SHOW TABLES in ${database.title}`, + datasource: dataSource?.title, + }), + query: { + id: dataSource?.id, + }, + }); + + database.meta = setMeta(database, response); + + return fetch(http, path, 'TABLE'); };