Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Telemetry] Data: Report dataset info only if there is known metadata #71419

Merged
merged 5 commits into from
Aug 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ describe('get_data_telemetry', () => {
{ name: 'logs-endpoint.1234', docCount: 0 }, // Matching pattern with a dot in the name
// New Indexing strategy: everything can be inferred from the constant_keyword values
{
name: 'logs-nginx.access-default-000001',
name: '.ds-logs-nginx.access-default-000001',
datasetName: 'nginx.access',
datasetType: 'logs',
shipper: 'filebeat',
Expand All @@ -84,14 +84,50 @@ describe('get_data_telemetry', () => {
sizeInBytes: 1000,
},
{
name: 'logs-nginx.access-default-000002',
name: '.ds-logs-nginx.access-default-000002',
datasetName: 'nginx.access',
datasetType: 'logs',
shipper: 'filebeat',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
{
name: '.ds-traces-something-default-000002',
datasetName: 'something',
datasetType: 'traces',
packageName: 'some-package',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
{
name: '.ds-metrics-something.else-default-000002',
datasetName: 'something.else',
datasetType: 'metrics',
managedBy: 'ingest-manager',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
// Filter out if it has datasetName and datasetType but none of the shipper, packageName or managedBy === 'ingest-manager'
{
name: 'some-index-that-should-not-show',
datasetName: 'should-not-show',
datasetType: 'logs',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
{
name: 'other-index-that-should-not-show',
datasetName: 'should-not-show-either',
datasetType: 'metrics',
managedBy: 'me',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
])
).toStrictEqual([
{
Expand Down Expand Up @@ -138,6 +174,21 @@ describe('get_data_telemetry', () => {
doc_count: 2000,
size_in_bytes: 1060,
},
{
dataset: { name: 'something', type: 'traces' },
package: { name: 'some-package' },
index_count: 1,
ecs_index_count: 1,
doc_count: 1000,
size_in_bytes: 60,
},
{
dataset: { name: 'something.else', type: 'metrics' },
index_count: 1,
ecs_index_count: 1,
doc_count: 1000,
size_in_bytes: 60,
},
]);
});
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ export interface DataTelemetryDocument extends DataTelemetryBasePayload {
name?: string;
type?: DataTelemetryType | 'unknown' | string; // The union of types is to help autocompletion with some known `dataset.type`s
};
package?: {
name: string;
};
shipper?: string;
pattern_name?: DataPatternName;
}
Expand All @@ -44,6 +47,8 @@ export type DataTelemetryPayload = DataTelemetryDocument[];

export interface DataTelemetryIndex {
name: string;
packageName?: string; // Populated by Ingest Manager at `_meta.package.name`
managedBy?: string; // Populated by Ingest Manager at `_meta.managed_by`
datasetName?: string; // To be obtained from `mappings.dataset.name` if it's a constant keyword
datasetType?: string; // To be obtained from `mappings.dataset.type` if it's a constant keyword
shipper?: string; // To be obtained from `_meta.beat` if it's set
Expand All @@ -58,6 +63,7 @@ export interface DataTelemetryIndex {
type AtLeastOne<T, U = { [K in keyof T]: Pick<T, K> }> = Partial<T> & U[keyof U];

type DataDescriptor = AtLeastOne<{
packageName: string;
datasetName: string;
datasetType: string;
shipper: string;
Expand All @@ -67,17 +73,28 @@ type DataDescriptor = AtLeastOne<{
function findMatchingDescriptors({
name,
shipper,
packageName,
managedBy,
datasetName,
datasetType,
}: DataTelemetryIndex): DataDescriptor[] {
// If we already have the data from the indices' mappings...
if ([shipper, datasetName, datasetType].some(Boolean)) {
if (
[shipper, packageName].some(Boolean) ||
(managedBy === 'ingest-manager' && [datasetType, datasetName].some(Boolean))
) {
return [
{
...(shipper && { shipper }),
...(packageName && { packageName }),
...(datasetName && { datasetName }),
...(datasetType && { datasetType }),
} as AtLeastOne<{ datasetName: string; datasetType: string; shipper: string }>, // Using casting here because TS doesn't infer at least one exists from the if clause
} as AtLeastOne<{
packageName: string;
datasetName: string;
datasetType: string;
shipper: string;
}>, // Using casting here because TS doesn't infer at least one exists from the if clause
];
}

Expand Down Expand Up @@ -122,6 +139,7 @@ export function buildDataTelemetryPayload(indices: DataTelemetryIndex[]): DataTe
({ name }) =>
!(
name.startsWith('.') &&
!name.startsWith('.ds-') && // data_stream-related indices can be included
!startingDotPatternsUntilTheFirstAsterisk.find((pattern) => name.startsWith(pattern))
)
);
Expand All @@ -130,10 +148,17 @@ export function buildDataTelemetryPayload(indices: DataTelemetryIndex[]): DataTe

for (const indexCandidate of indexCandidates) {
const matchingDescriptors = findMatchingDescriptors(indexCandidate);
for (const { datasetName, datasetType, shipper, patternName } of matchingDescriptors) {
const key = `${datasetName}-${datasetType}-${shipper}-${patternName}`;
for (const {
datasetName,
datasetType,
packageName,
shipper,
patternName,
} of matchingDescriptors) {
const key = `${datasetName}-${datasetType}-${packageName}-${shipper}-${patternName}`;
acc.set(key, {
...((datasetName || datasetType) && { dataset: { name: datasetName, type: datasetType } }),
...(packageName && { package: { name: packageName } }),
...(shipper && { shipper }),
...(patternName && { pattern_name: patternName }),
...increaseCounters(acc.get(key), indexCandidate),
Expand Down Expand Up @@ -165,6 +190,12 @@ interface IndexMappings {
mappings: {
_meta?: {
beat?: string;

// Ingest Manager provided metadata
package?: {
name?: string;
};
managed_by?: string; // Typically "ingest-manager"
};
properties: {
dataset?: {
Expand Down Expand Up @@ -195,7 +226,7 @@ export async function getDataTelemetry(callCluster: LegacyAPICaller) {
try {
const index = [
...DATA_DATASETS_INDEX_PATTERNS_UNIQUE.map(({ pattern }) => pattern),
'*-*-*-*', // Include new indexing strategy indices {type}-{dataset}-{namespace}-{rollover_counter}
'*-*-*', // Include data-streams aliases `{type}-{dataset}-{namespace}`
];
const [indexMappings, indexStats]: [IndexMappings, IndexStats] = await Promise.all([
// GET */_mapping?filter_path=*.mappings._meta.beat,*.mappings.properties.ecs.properties.version.type,*.mappings.properties.dataset.properties.type.value,*.mappings.properties.dataset.properties.name.value
Expand All @@ -204,16 +235,17 @@ export async function getDataTelemetry(callCluster: LegacyAPICaller) {
filterPath: [
// _meta.beat tells the shipper
'*.mappings._meta.beat',
// _meta.package.name tells the Ingest Manager's package
'*.mappings._meta.package.name',
// _meta.managed_by is usually populated by Ingest Manager for the UI to identify it
'*.mappings._meta.managed_by',
// Does it have `ecs.version` in the mappings? => It follows the ECS conventions
'*.mappings.properties.ecs.properties.version.type',

// Disable the fields below because they are still pending to be confirmed:
// https://github.com/elastic/ecs/pull/845
// TODO: Re-enable when the final fields are confirmed
// // If `dataset.type` is a `constant_keyword`, it can be reported as a type
// '*.mappings.properties.dataset.properties.type.value',
// // If `dataset.name` is a `constant_keyword`, it can be reported as the dataset
// '*.mappings.properties.dataset.properties.name.value',
// If `dataset.type` is a `constant_keyword`, it can be reported as a type
'*.mappings.properties.dataset.properties.type.value',
// If `dataset.name` is a `constant_keyword`, it can be reported as the dataset
'*.mappings.properties.dataset.properties.name.value',
],
}),
// GET <index>/_stats/docs,store?level=indices&filter_path=indices.*.total
Expand All @@ -227,24 +259,25 @@ export async function getDataTelemetry(callCluster: LegacyAPICaller) {

const indexNames = Object.keys({ ...indexMappings, ...indexStats?.indices });
const indices = indexNames.map((name) => {
const isECS = !!indexMappings[name]?.mappings?.properties.ecs?.properties.version?.type;
const shipper = indexMappings[name]?.mappings?._meta?.beat;
const datasetName = indexMappings[name]?.mappings?.properties.dataset?.properties.name?.value;
const datasetType = indexMappings[name]?.mappings?.properties.dataset?.properties.type?.value;
const baseIndexInfo = {
name,
isECS: !!indexMappings[name]?.mappings?.properties.ecs?.properties.version?.type,
shipper: indexMappings[name]?.mappings?._meta?.beat,
packageName: indexMappings[name]?.mappings?._meta?.package?.name,
managedBy: indexMappings[name]?.mappings?._meta?.managed_by,
datasetName: indexMappings[name]?.mappings?.properties.dataset?.properties.name?.value,
datasetType: indexMappings[name]?.mappings?.properties.dataset?.properties.type?.value,
};

const stats = (indexStats?.indices || {})[name];
if (stats) {
return {
name,
datasetName,
datasetType,
shipper,
isECS,
...baseIndexInfo,
docCount: stats.total?.docs?.count,
sizeInBytes: stats.total?.store?.size_in_bytes,
};
}
return { name, datasetName, datasetType, shipper, isECS };
return baseIndexInfo;
});
return buildDataTelemetryPayload(indices);
} catch (e) {
Expand Down