Skip to content

Commit

Permalink
[Telemetry] Data: Report dataset information only if there is known m…
Browse files Browse the repository at this point in the history
…etadata
  • Loading branch information
afharo committed Jul 13, 2020
1 parent 60032b8 commit 313c747
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,42 @@ describe('get_data_telemetry', () => {
docCount: 1000,
sizeInBytes: 60,
},
{
name: 'traces-something-default-000002',
datasetName: 'something',
datasetType: 'traces',
packageName: 'some-package',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
{
name: 'metrics-something.else-default-000002',
datasetName: 'something.else',
datasetType: 'metrics',
managedBy: 'ingest-manager',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
// Filter out if it has datasetName and datasetType but none of the shipper, packageName or managedBy === 'ingest-manager'
{
name: 'some-index-that-should-not-show',
datasetName: 'should-not-show',
datasetType: 'logs',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
{
name: 'other-index-that-should-not-show',
datasetName: 'should-not-show-either',
datasetType: 'metrics',
managedBy: 'me',
isECS: true,
docCount: 1000,
sizeInBytes: 60,
},
])
).toStrictEqual([
{
Expand Down Expand Up @@ -138,6 +174,21 @@ describe('get_data_telemetry', () => {
doc_count: 2000,
size_in_bytes: 1060,
},
{
dataset: { name: 'something', type: 'traces' },
package: { name: 'some-package' },
index_count: 1,
ecs_index_count: 1,
doc_count: 1000,
size_in_bytes: 60,
},
{
dataset: { name: 'something.else', type: 'metrics' },
index_count: 1,
ecs_index_count: 1,
doc_count: 1000,
size_in_bytes: 60,
},
]);
});
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ export interface DataTelemetryDocument extends DataTelemetryBasePayload {
name?: string;
type?: DataTelemetryType | 'unknown' | string; // The union of types is to help autocompletion with some known `dataset.type`s
};
package?: {
name: string;
};
shipper?: string;
pattern_name?: DataPatternName;
}
Expand All @@ -44,6 +47,8 @@ export type DataTelemetryPayload = DataTelemetryDocument[];

export interface DataTelemetryIndex {
name: string;
packageName?: string; // Populated by Ingest Manager at `_meta.package.name`
managedBy?: string; // Populated by Ingest Manager at `_meta.managed_by`
datasetName?: string; // To be obtained from `mappings.dataset.name` if it's a constant keyword
datasetType?: string; // To be obtained from `mappings.dataset.type` if it's a constant keyword
shipper?: string; // To be obtained from `_meta.beat` if it's set
Expand All @@ -58,6 +63,7 @@ export interface DataTelemetryIndex {
type AtLeastOne<T, U = { [K in keyof T]: Pick<T, K> }> = Partial<T> & U[keyof U];

type DataDescriptor = AtLeastOne<{
packageName: string;
datasetName: string;
datasetType: string;
shipper: string;
Expand All @@ -67,17 +73,28 @@ type DataDescriptor = AtLeastOne<{
function findMatchingDescriptors({
name,
shipper,
packageName,
managedBy,
datasetName,
datasetType,
}: DataTelemetryIndex): DataDescriptor[] {
// If we already have the data from the indices' mappings...
if ([shipper, datasetName, datasetType].some(Boolean)) {
if (
[shipper, packageName].some(Boolean) ||
(managedBy === 'ingest-manager' && [datasetType, datasetName].some(Boolean))
) {
return [
{
...(shipper && { shipper }),
...(packageName && { packageName }),
...(datasetName && { datasetName }),
...(datasetType && { datasetType }),
} as AtLeastOne<{ datasetName: string; datasetType: string; shipper: string }>, // Using casting here because TS doesn't infer at least one exists from the if clause
} as AtLeastOne<{
packageName: string;
datasetName: string;
datasetType: string;
shipper: string;
}>, // Using casting here because TS doesn't infer at least one exists from the if clause
];
}

Expand Down Expand Up @@ -130,10 +147,17 @@ export function buildDataTelemetryPayload(indices: DataTelemetryIndex[]): DataTe

for (const indexCandidate of indexCandidates) {
const matchingDescriptors = findMatchingDescriptors(indexCandidate);
for (const { datasetName, datasetType, shipper, patternName } of matchingDescriptors) {
const key = `${datasetName}-${datasetType}-${shipper}-${patternName}`;
for (const {
datasetName,
datasetType,
packageName,
shipper,
patternName,
} of matchingDescriptors) {
const key = `${datasetName}-${datasetType}-${packageName}-${shipper}-${patternName}`;
acc.set(key, {
...((datasetName || datasetType) && { dataset: { name: datasetName, type: datasetType } }),
...(packageName && { package: { name: packageName } }),
...(shipper && { shipper }),
...(patternName && { pattern_name: patternName }),
...increaseCounters(acc.get(key), indexCandidate),
Expand Down Expand Up @@ -165,6 +189,12 @@ interface IndexMappings {
mappings: {
_meta?: {
beat?: string;

// Ingest Manager provided metadata
package?: {
name?: string;
};
managed_by?: string; // Typically "ingest-manager"
};
properties: {
dataset?: {
Expand Down Expand Up @@ -204,16 +234,17 @@ export async function getDataTelemetry(callCluster: LegacyAPICaller) {
filterPath: [
// _meta.beat tells the shipper
'*.mappings._meta.beat',
// _meta.package.name tells the Ingest Manager's package
'*.mappings._meta.package.name',
// _meta.managed_by is usually populated by Ingest Manager for the UI to identify it
'*.mappings._meta.managed_by',
// Does it have `ecs.version` in the mappings? => It follows the ECS conventions
'*.mappings.properties.ecs.properties.version.type',

// Disable the fields below because they are still pending to be confirmed:
// https://github.com/elastic/ecs/pull/845
// TODO: Re-enable when the final fields are confirmed
// // If `dataset.type` is a `constant_keyword`, it can be reported as a type
// '*.mappings.properties.dataset.properties.type.value',
// // If `dataset.name` is a `constant_keyword`, it can be reported as the dataset
// '*.mappings.properties.dataset.properties.name.value',
// If `dataset.type` is a `constant_keyword`, it can be reported as a type
'*.mappings.properties.dataset.properties.type.value',
// If `dataset.name` is a `constant_keyword`, it can be reported as the dataset
'*.mappings.properties.dataset.properties.name.value',
],
}),
// GET <index>/_stats/docs,store?level=indices&filter_path=indices.*.total
Expand All @@ -227,24 +258,25 @@ export async function getDataTelemetry(callCluster: LegacyAPICaller) {

const indexNames = Object.keys({ ...indexMappings, ...indexStats?.indices });
const indices = indexNames.map((name) => {
const isECS = !!indexMappings[name]?.mappings?.properties.ecs?.properties.version?.type;
const shipper = indexMappings[name]?.mappings?._meta?.beat;
const datasetName = indexMappings[name]?.mappings?.properties.dataset?.properties.name?.value;
const datasetType = indexMappings[name]?.mappings?.properties.dataset?.properties.type?.value;
const baseIndexInfo = {
name,
isECS: !!indexMappings[name]?.mappings?.properties.ecs?.properties.version?.type,
shipper: indexMappings[name]?.mappings?._meta?.beat,
packageName: indexMappings[name]?.mappings?._meta?.package?.name,
managedBy: indexMappings[name]?.mappings?._meta?.managed_by,
datasetName: indexMappings[name]?.mappings?.properties.dataset?.properties.name?.value,
datasetType: indexMappings[name]?.mappings?.properties.dataset?.properties.type?.value,
};

const stats = (indexStats?.indices || {})[name];
if (stats) {
return {
name,
datasetName,
datasetType,
shipper,
isECS,
...baseIndexInfo,
docCount: stats.total?.docs?.count,
sizeInBytes: stats.total?.store?.size_in_bytes,
};
}
return { name, datasetName, datasetType, shipper, isECS };
return baseIndexInfo;
});
return buildDataTelemetryPayload(indices);
} catch (e) {
Expand Down

0 comments on commit 313c747

Please sign in to comment.