Skip to content

Commit

Permalink
Merge pull request #404 from JupiterOne/1991-index-metadata
Browse files Browse the repository at this point in the history
OPTIMIZATION: Support for omitting specific graph objects from file storage
  • Loading branch information
austinkelleher authored Dec 27, 2020
2 parents d51a70f + 0ac3d50 commit 8061b53
Show file tree
Hide file tree
Showing 12 changed files with 472 additions and 36 deletions.
6 changes: 3 additions & 3 deletions packages/cli/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@jupiterone/cli",
"version": "5.2.1",
"version": "5.3.0",
"description": "The JupiterOne cli",
"main": "dist/src/index.js",
"types": "dist/src/index.d.ts",
Expand All @@ -24,8 +24,8 @@
"test": "jest"
},
"dependencies": {
"@jupiterone/integration-sdk-core": "^5.2.1",
"@jupiterone/integration-sdk-runtime": "^5.2.1",
"@jupiterone/integration-sdk-core": "^5.3.0",
"@jupiterone/integration-sdk-runtime": "^5.3.0",
"@lifeomic/attempt": "^3.0.0",
"commander": "^5.0.0",
"globby": "^11.0.1",
Expand Down
6 changes: 3 additions & 3 deletions packages/integration-sdk-cli/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@jupiterone/integration-sdk-cli",
"version": "5.2.1",
"version": "5.3.0",
"description": "The SDK for developing JupiterOne integrations",
"main": "dist/src/index.js",
"types": "dist/src/index.d.ts",
Expand All @@ -22,7 +22,7 @@
"prepack": "yarn build:dist"
},
"dependencies": {
"@jupiterone/integration-sdk-runtime": "^5.2.1",
"@jupiterone/integration-sdk-runtime": "^5.3.0",
"commander": "^5.0.0",
"globby": "^11.0.0",
"lodash": "^4.17.19",
Expand All @@ -31,7 +31,7 @@
"vis": "^4.21.0-EOL"
},
"devDependencies": {
"@jupiterone/integration-sdk-private-test-utils": "^5.2.1",
"@jupiterone/integration-sdk-private-test-utils": "^5.3.0",
"@pollyjs/adapter-node-http": "^4.0.4",
"@pollyjs/core": "^4.0.4",
"@pollyjs/persister-fs": "^4.0.4",
Expand Down
2 changes: 1 addition & 1 deletion packages/integration-sdk-core/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@jupiterone/integration-sdk-core",
"version": "5.2.1",
"version": "5.3.0",
"description": "The SDK for developing JupiterOne integrations",
"main": "dist/src/index.js",
"types": "dist/src/index.d.ts",
Expand Down
14 changes: 14 additions & 0 deletions packages/integration-sdk-core/src/types/step.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,15 @@ export type IntegrationStep<
TConfig extends IntegrationInstanceConfig = IntegrationInstanceConfig
> = StepMetadata & Step<IntegrationStepExecutionContext<TConfig>>;

export interface GraphObjectIndexMetadata {
/**
* Whether the index of the graph object store is enabled or not. For example,
* in the case leveraging the `FileSystemGraphObjectStore`, this value
* determines whether we need to write the specific graph object to disk.
*/
enabled: boolean;
}

export interface StepGraphObjectMetadata {
_type: string;

Expand All @@ -88,6 +97,11 @@ export interface StepGraphObjectMetadata {
* * Ticket systems
*/
partial?: boolean;

/**
* Contains metadadata that can be leveraged inside of the graph object store
*/
indexMetadata?: GraphObjectIndexMetadata;
}

export interface StepEntityMetadata extends StepGraphObjectMetadata {
Expand Down
11 changes: 11 additions & 0 deletions packages/integration-sdk-core/src/types/storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ import {
GraphObjectLookupKey,
} from './jobState';
import { Relationship } from './relationship';
import { GraphObjectIndexMetadata } from '../types/step';

export interface GetIndexMetadataForGraphObjectTypeParams {
stepId: string;
_type: string;
graphObjectCollectionType: 'entities' | 'relationships';
}

/**
* Persists entities and relationships to a durable medium for the duration of
Expand Down Expand Up @@ -39,4 +46,8 @@ export interface GraphObjectStore {
onEntitiesFlushed?: (entities: Entity[]) => Promise<void>,
onRelationshipsFlushed?: (relationships: Relationship[]) => Promise<void>,
): Promise<void>;

getIndexMetadataForGraphObjectType?: (
params: GetIndexMetadataForGraphObjectTypeParams,
) => GraphObjectIndexMetadata | undefined;
}
6 changes: 3 additions & 3 deletions packages/integration-sdk-dev-tools/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@jupiterone/integration-sdk-dev-tools",
"version": "5.2.1",
"version": "5.3.0",
"description": "A collection of developer tools that will assist with building integrations.",
"repository": "git@github.com:JupiterOne/sdk.git",
"author": "JupiterOne <dev@jupiterone.io>",
Expand All @@ -15,8 +15,8 @@
"access": "public"
},
"dependencies": {
"@jupiterone/integration-sdk-cli": "^5.2.1",
"@jupiterone/integration-sdk-testing": "^5.2.1",
"@jupiterone/integration-sdk-cli": "^5.3.0",
"@jupiterone/integration-sdk-testing": "^5.3.0",
"@types/jest": "^25.2.3",
"@types/node": "^14.0.5",
"@typescript-eslint/eslint-plugin": "^3.8.0",
Expand Down
4 changes: 2 additions & 2 deletions packages/integration-sdk-private-test-utils/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@jupiterone/integration-sdk-private-test-utils",
"private": true,
"version": "5.2.1",
"version": "5.3.0",
"description": "The SDK for developing JupiterOne integrations",
"main": "dist/index.js",
"types": "dist/index.d.ts",
Expand All @@ -12,7 +12,7 @@
"node": "10.x || 12.x || 14.x"
},
"dependencies": {
"@jupiterone/integration-sdk-core": "^5.2.1",
"@jupiterone/integration-sdk-core": "^5.3.0",
"lodash": "^4.17.15",
"uuid": "^7.0.3"
},
Expand Down
6 changes: 3 additions & 3 deletions packages/integration-sdk-runtime/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@jupiterone/integration-sdk-runtime",
"version": "5.2.1",
"version": "5.3.0",
"description": "The SDK for developing JupiterOne integrations",
"main": "dist/src/index.js",
"types": "dist/src/index.d.ts",
Expand All @@ -24,7 +24,7 @@
"prepack": "yarn build:dist"
},
"dependencies": {
"@jupiterone/integration-sdk-core": "^5.2.1",
"@jupiterone/integration-sdk-core": "^5.3.0",
"@lifeomic/alpha": "^1.1.3",
"@lifeomic/attempt": "^3.0.0",
"async-sema": "^3.1.0",
Expand All @@ -43,7 +43,7 @@
"uuid": "^7.0.3"
},
"devDependencies": {
"@jupiterone/integration-sdk-private-test-utils": "^5.2.1",
"@jupiterone/integration-sdk-private-test-utils": "^5.3.0",
"@types/uuid": "^7.0.2",
"get-port": "^5.1.1",
"memfs": "^3.2.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ import {
IntegrationMissingKeyError,
Relationship,
GraphObjectStore,
GraphObjectIndexMetadata,
GetIndexMetadataForGraphObjectTypeParams,
IntegrationStep,
} from '@jupiterone/integration-sdk-core';

import { flushDataToDisk } from './flushDataToDisk';
Expand All @@ -26,6 +29,8 @@ export const DEFAULT_GRAPH_OBJECT_BUFFER_THRESHOLD = 500;
const BINARY_SEMAPHORE_CONCURRENCY = 1;

export interface FileSystemGraphObjectStoreParams {
integrationSteps?: IntegrationStep[];

/**
* The maximum number of graph objects that this store can buffer in memory
* before writing to disk. Machines with more memory should consider bumping
Expand All @@ -41,26 +46,88 @@ export interface FileSystemGraphObjectStoreParams {
prettifyFiles?: boolean;
}

interface GraphObjectIndexMetadataMap {
/**
* Map of _type to GraphObjectIndexMetadata
*/
entities: Map<string, GraphObjectIndexMetadata>;
/**
* Map of _type to GraphObjectIndexMetadata
*/
relationships: Map<string, GraphObjectIndexMetadata>;
}

/**
* TODO: Write this comment to explain why the thing is the way it is
*/
function integrationStepsToGraphObjectIndexMetadataMap(
integrationSteps: IntegrationStep[],
): Map<string, GraphObjectIndexMetadataMap> {
const stepIdToGraphObjectIndexMetadataMap = new Map<
string,
GraphObjectIndexMetadataMap
>();

for (const step of integrationSteps) {
const metadataMap: GraphObjectIndexMetadataMap = {
entities: new Map(),
relationships: new Map(),
};

for (const entityMetadata of step.entities) {
if (entityMetadata.indexMetadata) {
metadataMap.entities.set(
entityMetadata._type,
entityMetadata.indexMetadata,
);
}
}

for (const relationshipMetadata of step.relationships) {
if (relationshipMetadata.indexMetadata) {
metadataMap.relationships.set(
relationshipMetadata._type,
relationshipMetadata.indexMetadata,
);
}
}

stepIdToGraphObjectIndexMetadataMap.set(step.id, metadataMap);
}

return stepIdToGraphObjectIndexMetadataMap;
}

export class FileSystemGraphObjectStore implements GraphObjectStore {
private readonly semaphore: Sema;
private readonly localGraphObjectStore = new InMemoryGraphObjectStore();
private readonly graphObjectBufferThreshold: number;
private readonly prettifyFiles: boolean;
private readonly stepIdToGraphObjectIndexMetadataMap: Map<
string,
GraphObjectIndexMetadataMap
>;

constructor(params?: FileSystemGraphObjectStoreParams) {
this.semaphore = new Sema(BINARY_SEMAPHORE_CONCURRENCY);
this.graphObjectBufferThreshold =
params?.graphObjectBufferThreshold ||
DEFAULT_GRAPH_OBJECT_BUFFER_THRESHOLD;
this.prettifyFiles = params?.prettifyFiles || false;

if (params?.integrationSteps) {
this.stepIdToGraphObjectIndexMetadataMap = integrationStepsToGraphObjectIndexMetadataMap(
params.integrationSteps,
);
}
}

async addEntities(
storageDirectoryPath: string,
stepId: string,
newEntities: Entity[],
onEntitiesFlushed?: (entities: Entity[]) => Promise<void>,
) {
this.localGraphObjectStore.addEntities(storageDirectoryPath, newEntities);
this.localGraphObjectStore.addEntities(stepId, newEntities);

if (
this.localGraphObjectStore.getTotalEntityItemCount() >=
Expand All @@ -71,14 +138,11 @@ export class FileSystemGraphObjectStore implements GraphObjectStore {
}

async addRelationships(
storageDirectoryPath: string,
stepId: string,
newRelationships: Relationship[],
onRelationshipsFlushed?: (relationships: Relationship[]) => Promise<void>,
) {
this.localGraphObjectStore.addRelationships(
storageDirectoryPath,
newRelationships,
);
this.localGraphObjectStore.addRelationships(stepId, newRelationships);

if (
this.localGraphObjectStore.getTotalRelationshipItemCount() >=
Expand Down Expand Up @@ -160,13 +224,29 @@ export class FileSystemGraphObjectStore implements GraphObjectStore {
pMap(
this.localGraphObjectStore.collectEntitiesByStep(),
async ([stepId, entities]) => {
await flushDataToDisk({
storageDirectoryPath: stepId,
collectionType: 'entities',
data: entities,
pretty: this.prettifyFiles,
const indexable = entities.filter((e) => {
const indexMetadata = this.getIndexMetadataForGraphObjectType({
stepId,
_type: e._type,
graphObjectCollectionType: 'entities',
});

if (typeof indexMetadata === 'undefined') {
return true;
}

return indexMetadata.enabled === true;
});

if (indexable.length) {
await flushDataToDisk({
storageDirectoryPath: stepId,
collectionType: 'entities',
data: indexable,
pretty: this.prettifyFiles,
});
}

this.localGraphObjectStore.flushEntities(entities);

if (onEntitiesFlushed) {
Expand All @@ -184,13 +264,29 @@ export class FileSystemGraphObjectStore implements GraphObjectStore {
pMap(
this.localGraphObjectStore.collectRelationshipsByStep(),
async ([stepId, relationships]) => {
await flushDataToDisk({
storageDirectoryPath: stepId,
collectionType: 'relationships',
data: relationships,
pretty: this.prettifyFiles,
const indexable = relationships.filter((r) => {
const indexMetadata = this.getIndexMetadataForGraphObjectType({
stepId,
_type: r._type,
graphObjectCollectionType: 'relationships',
});

if (typeof indexMetadata === 'undefined') {
return true;
}

return indexMetadata.enabled === true;
});

if (indexable.length) {
await flushDataToDisk({
storageDirectoryPath: stepId,
collectionType: 'relationships',
data: indexable,
pretty: this.prettifyFiles,
});
}

this.localGraphObjectStore.flushRelationships(relationships);

if (onRelationshipsFlushed) {
Expand All @@ -201,6 +297,21 @@ export class FileSystemGraphObjectStore implements GraphObjectStore {
);
}

getIndexMetadataForGraphObjectType({
stepId,
_type,
graphObjectCollectionType,
}: GetIndexMetadataForGraphObjectTypeParams):
| GraphObjectIndexMetadata
| undefined {
if (!this.stepIdToGraphObjectIndexMetadataMap) {
return undefined;
}

const map = this.stepIdToGraphObjectIndexMetadataMap.get(stepId);
return map && map[graphObjectCollectionType].get(_type);
}

/**
* This function is ensures that only one input operation can
* happen at a time by utilizing a binary semaphore (lock/unlock).
Expand Down
Loading

0 comments on commit 8061b53

Please sign in to comment.