Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(ingest): Refactor structured logging to support infos, warnings, and failures structured reporting to UI #10828

Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
4a637b0
Adding structured log reporting to ingestion framework:
Jul 2, 2024
856731c
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 2, 2024
5bf5d73
Adding final reporting method support
Jul 2, 2024
5461cff
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 2, 2024
7d580e5
Yeah
Jul 2, 2024
fd7357a
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 2, 2024
b8e5382
Adding refactoring
Jul 2, 2024
b5bfe6c
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 2, 2024
bd4b3ff
Adding title, making literalstring requirement
Jul 2, 2024
47445c8
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 2, 2024
e897ede
type -> title
hsheth2 Jul 2, 2024
a667cf8
Fix final occurrences of type
Jul 2, 2024
da0739e
Adding prettier and supporting new log fields from ingest
Jul 2, 2024
023ad85
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 2, 2024
839389b
add structured logs type
hsheth2 Jul 3, 2024
6504bd4
Merge branch 'jj--add-structured-logging-to-ingestion' of ssh://githu…
hsheth2 Jul 3, 2024
9cd2035
Test failures pause
Jul 3, 2024
41757e6
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
35a8bdc
fix logs
hsheth2 Jul 3, 2024
fb47657
Adding test fixes
Jul 3, 2024
17d1ddb
Fixing json
Jul 3, 2024
41d817e
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
99e7f14
Adding frontend handling
Jul 3, 2024
a31173c
fix lossy list in report
hsheth2 Jul 3, 2024
7b08229
Merge branch 'jj--add-structured-logging-to-ingestion' of ssh://githu…
hsheth2 Jul 3, 2024
3f4c9b2
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
3c18e1a
fix failure reporting bug
hsheth2 Jul 3, 2024
c2f2aff
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
662c6d0
Final touches to make things work
Jul 3, 2024
d72da4a
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
2dfef8d
Fix mode tests
Jul 3, 2024
6547a5a
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
245aa4b
Redshift to DataHub
Jul 3, 2024
d6650a2
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
a1adbe9
Addressing comments
Jul 3, 2024
aabe311
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
0eaa0a9
Adding source utils
Jul 3, 2024
01d0263
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
cadc4f6
Fix the build
Jul 3, 2024
3ac4b42
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 3, 2024
49d3774
frontend lint
Jul 4, 2024
8fc1b39
Merge remote-tracking branch 'acryl/jj--add-structured-logging-to-ing…
Jul 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 3 additions & 159 deletions datahub-web-react/src/app/ingest/source/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,71 +16,16 @@ export enum StructuredReportItemLevel {
INFO,
}

/**
* A set of standard / well supported warnings or error types
*/
export enum StructuredReportItemType {
/**
* Unauthorized to scan a specific part of the source - database, schema, project, or etc or a specific asset.
*/
SCAN_UNAUTHORIZED,
/**
* Unauthorized to access lineage information.
*/
LINEAGE_UNAUTHORIZED,
/**
* Unauthorized to access usage information - recent queries.
*/
USAGE_UNAUTHORIZED,
/**
* Unauthorized to profile some tables.
*/
PROFILING_UNAUTHORIZED,
/**
* Failure to parse some queries to extract column or asset-level lineage.
*/
LINEAGE_QUERY_PARSING_FAILED,
/**
* Failure to parse some queries
*/
USAGE_QUERY_PARSING_FAILED,
/**
* Failure to connect to the data source due to malformed connection details
*/
CONNECTION_FAILED_COORDINATES,
/**
* Failure to connect to the data source due to bad credentials
*/
CONNECTION_FAILED_CREDENTIALS,
/**
* Failure to connect to the data source due to unavailability of 3rd party service.
*/
CONNECTION_FAILED_SERVICE_UNAVAILABLE,
/**
* Failure to connect to the data source due to a client-side timeout.
*/
CONNECTION_FAILED_SERVICE_TIMEOUT,
/**
* Failure to connect to the data source for an unknown reason.
*/
CONNECTION_FAILED_UNKNOWN,
/**
* Fallback type for unrecognized structured report lines.
*/
UNKNOWN,
}

/**
* A type describing an individual warning / failure item in a structured report.
*
* TODO: Determine whether we need a message field to be reported!
*/
export interface StructuredReportItem {
export interface StructuredReportLogEntry {
level: StructuredReportItemLevel; // The "log level"
title: string; // The "well-supported" or standardized title
title?: string; // The "well-supported" or standardized title
message: string; // The message to display associated with the error.
context: string[]; // The context of WHERE the issue was encountered, as a string.
rawType: string; // The "raw type" string received from the ingestion backend.
}

/**
Expand All @@ -90,106 +35,5 @@ export interface StructuredReport {
infoCount: number;
errorCount: number;
warnCount: number;
items: StructuredReportItem[];
items: StructuredReportLogEntry[];
}

/**
* A mapping of the frontend standardized error types to their messages and the raw backend error types that they are mapped from.
*/
export const STRUCTURED_REPORT_ITEM_DISPLAY_DETAILS = [
{
type: StructuredReportItemType.UNKNOWN,
title: 'An unexpected issue occurred',
},
{
type: StructuredReportItemType.SCAN_UNAUTHORIZED,
title: 'Unauthorized to scan some assets',
message: 'The provided credential details were unauthorized to scan some assets in the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.LINEAGE_UNAUTHORIZED,
title: 'Unauthorized to extract some lineage',
message:
'The provided credential details were unauthorized to extract some asset lineage from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.USAGE_UNAUTHORIZED,
title: 'Unauthorized to extract some usage',
message:
'The provided credential details were unauthorized to extract some asset usage information from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.PROFILING_UNAUTHORIZED,
title: 'Unauthorized to extract some data statistics',
message:
'The provided credential details were unauthorized to extract some asset profiles or statistics from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.LINEAGE_QUERY_PARSING_FAILED,
title: 'Failed to extract some lineage',
message: 'Failed to extract lineage for some assets due to failed query parsing.',
rawTypes: [],
},
{
type: StructuredReportItemType.USAGE_QUERY_PARSING_FAILED,
title: 'Failed to extract some usage',
message: 'Failed to extract usage or popularity for some assets due to failed query parsing.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_COORDINATES,
title: 'Failed to connect using provided details',
message:
'Failed to connect to data source. Unable to establish a connection to the specified service. Please check the connection details.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_CREDENTIALS,
title: 'Failed to connect using provided credentials',
message:
'Failed to connect to data source. Unable to authenticate with the specified service using the provided credentials. Please check the connection credentials.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_SERVICE_UNAVAILABLE,
title: 'Service unavailable',
message: 'Failed to connect to the data source. The service is currently unavailable.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_SERVICE_TIMEOUT,
title: 'Service timeout',
message:
'Failed to connect to the data source. A timeout was encountered when attempting to extract data from the data source.',
rawTypes: [],
},
{
type: StructuredReportItemType.CONNECTION_FAILED_UNKNOWN,
title: 'Unknown connection error',
message: 'Failed to connect to the data source for an unknown reason. Please check the connection details.',
rawTypes: [],
},
];

/**
* Map raw type to details associated above.
*/
export const STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS = new Map();
STRUCTURED_REPORT_ITEM_DISPLAY_DETAILS.forEach((details) => {
const rawTypes = details.rawTypes || [];
rawTypes.forEach((rawType) => {
STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS.set(rawType, details);
});
});

/**
* Map std type to details associated above.
*/
export const STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS = new Map();
STRUCTURED_REPORT_ITEM_DISPLAY_DETAILS.forEach((details) => {
STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS.set(details.type, details);
});
115 changes: 47 additions & 68 deletions datahub-web-react/src/app/ingest/source/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,7 @@ import EntityRegistry from '../../entity/EntityRegistry';
import { ANTD_GRAY, REDESIGN_COLORS } from '../../entity/shared/constants';
import { capitalizeFirstLetterOnly, pluralize } from '../../shared/textUtil';
import { SourceConfig } from './builder/types';
import {
STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS,
STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS,
StructuredReport,
StructuredReportItem,
StructuredReportItemLevel,
StructuredReportItemType,
} from './types';
import { StructuredReport, StructuredReportLogEntry, StructuredReportItemLevel } from './types';

export const getSourceConfigs = (ingestionSources: SourceConfig[], sourceType: string) => {
const sourceConfigs = ingestionSources.find((source) => source.name === sourceType);
Expand Down Expand Up @@ -135,7 +128,9 @@ export const getExecutionRequestStatusDisplayColor = (status: string) => {
export const validateURL = (fieldName: string) => {
return {
validator(_, value) {
const URLPattern = new RegExp(/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[a-zA-Z0-9.-]{2,})+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/);
const URLPattern = new RegExp(
/^(?:http(s)?:\/\/)?[\w.-]+(?:\.[a-zA-Z0-9.-]{2,})+[\w\-._~:/?#[\]@!$&'()*+,;=.]+$/,
);
const isURLValid = URLPattern.test(value);
if (!value || isURLValid) {
return Promise.resolve();
Expand All @@ -145,36 +140,12 @@ export const validateURL = (fieldName: string) => {
};
};

const tryMapRawTypeToStructuredTypeByName = (rawType: string): StructuredReportItemType => {
const normalizedType = rawType.toLocaleUpperCase();
return (
StructuredReportItemType[normalizedType as keyof typeof StructuredReportItemType] ||
StructuredReportItemType.UNKNOWN
);
};

const getStructuredReportItemType = (rawType: string): StructuredReportItemType => {
return STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS.has(rawType)
? STRUCTURED_REPORT_ITEM_RAW_TYPE_TO_DETAILS.get(rawType).type
: tryMapRawTypeToStructuredTypeByName(rawType);
};

const getStructuredReportItemTitle = (rawType: string): string => {
const type = getStructuredReportItemType(rawType);
return STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS.get(type)?.title;
};

const getStructuredReportItemLevel = (rawLevel: string) => {
const normalizedLevel = rawLevel.toLocaleUpperCase();
return StructuredReportItemLevel[normalizedLevel as keyof typeof StructuredReportItemType];
};

const getStructuredReportItemMessage = (rawType: string): string => {
const stdType = getStructuredReportItemType(rawType);
return StructuredReportItemType.UNKNOWN ? rawType : STRUCTURED_REPORT_ITEM_TYPE_TO_DETAILS.get(stdType)?.message;
return StructuredReportItemLevel[normalizedLevel as keyof typeof StructuredReportItemLevel];
};

const createStructuredReport = (items: StructuredReportItem[]): StructuredReport => {
const createStructuredReport = (items: StructuredReportLogEntry[]): StructuredReport => {
const errorCount = items.filter((item) => item.level === StructuredReportItemLevel.ERROR).length;
const warnCount = items.filter((item) => item.level === StructuredReportItemLevel.WARN).length;
const infoCount = items.filter((item) => item.level === StructuredReportItemLevel.INFO).length;
Expand All @@ -191,61 +162,69 @@ const transformToStructuredReport = (structuredReportObj: any): StructuredReport
return null;
}

/* Legacy help function to map backend failure or warning ingestion objects into StructuredReportItems */
/* Legacy helper function to map backend failure or warning ingestion objects into StructuredReportLogEntry[] */
const mapItemObject = (
items: { [key: string]: string[] },
level: StructuredReportItemLevel,
): StructuredReportItem[] => {
return Object.entries(items).map(([rawType, context]) => ({
): StructuredReportLogEntry[] => {
return Object.entries(items).map(([rawMessage, context]) => ({
level,
title: getStructuredReportItemTitle(rawType),
message: getStructuredReportItemMessage(rawType),
title: 'An unexpected issue occurred',
message: rawMessage,
context,
rawType,
}));
};

/* V2 help function to map backend failure or warning lists into StructuredReportItems */
const mapItemArray = (items): StructuredReportItem[] => {
return items.map((item) => ({
level: getStructuredReportItemLevel(item.level),
title: getStructuredReportItemTitle(item.type),
message: !item.message ? getStructuredReportItemMessage(item.type) : item.message,
context: item.context,
rawType: item.type,
}));
/* V2 helper function to map backend failure or warning lists into StructuredReportLogEntry[] */
const mapItemArray = (items, level: StructuredReportItemLevel): StructuredReportLogEntry[] => {
return items
.map((item) => {
if (typeof item === 'string') {
// Handle "sampled from" case..
return null;
}

return {
level,
title: item.title || 'An unexpected issue occurred',
message: item.message,
context: item.context,
};
})
.filter((item) => item != null);
};

const sourceReport = structuredReportObj.source?.report;

if (!sourceReport) {
return null;
}
try {
const sourceReport = structuredReportObj.source?.report;

// extract the report.
let structuredReport: StructuredReport;
if (!sourceReport) {
return null;
}

if (sourceReport.structured_logs) {
// If the report has NEW structured logs fields, use that field.
structuredReport = createStructuredReport(mapItemArray(sourceReport.structured_logs || []));
} else {
// Else fallback to using the legacy fields
const failures = sourceReport.failure_list
const failures = Array.isArray(sourceReport.failures)
? /* Use V2 failureList if present */
mapItemArray(sourceReport.failure_list || [])
mapItemArray(sourceReport.failures || [], StructuredReportItemLevel.ERROR)
: /* Else use the legacy object type */
mapItemObject(sourceReport.failures || {}, StructuredReportItemLevel.ERROR);

const warnings = sourceReport.warning_list
const warnings = Array.isArray(sourceReport.warnings)
? /* Use V2 warning if present */
mapItemArray(sourceReport.warning_list || [])
mapItemArray(sourceReport.warnings || [], StructuredReportItemLevel.WARN)
: /* Else use the legacy object type */
mapItemObject(sourceReport.warnings || {}, StructuredReportItemLevel.WARN);

structuredReport = createStructuredReport([...failures, ...warnings]);
}
const infos = Array.isArray(sourceReport.infos)
? /* Use V2 infos if present */
mapItemArray(sourceReport.infos || [], StructuredReportItemLevel.INFO)
: /* Else use the legacy object type */
mapItemObject(sourceReport.infos || {}, StructuredReportItemLevel.INFO);

return structuredReport;
return createStructuredReport([...failures, ...warnings, ...infos]);
} catch (e) {
console.warn('Failed to extract structured report from ingestion report!', e);
return null;
}
};

export const getStructuredReport = (result: Partial<ExecutionRequestResult>): StructuredReport | null => {
Expand Down
7 changes: 3 additions & 4 deletions metadata-ingestion/src/datahub/cli/json_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,13 @@ def check_mce_file(filepath: str) -> str:
mce_source = GenericFileSource.create({"filename": filepath}, None)
for _ in mce_source.get_workunits():
pass
if mce_source.get_report().failures:
if len(mce_source.get_report().failures):
# raise the first failure found
logger.error(
f"Event file check failed with errors. Raising first error found. Full report {mce_source.get_report().as_string()}"
)
for failure_list in mce_source.get_report().failures.values():
if len(failure_list):
raise Exception(failure_list[0])
for failure in mce_source.get_report().failures:
raise Exception(failure.context)
raise Exception(
f"Failed to process file due to {mce_source.get_report().failures}"
)
Expand Down
Loading
Loading