From 4943b347fa72b385cda84efa325d72d74eff4d4a Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 18 Sep 2024 22:39:58 +0200 Subject: [PATCH 01/41] wip: query format definition --- src/queries/base-query-format.ts | 4 ++ .../call-context-query-format.ts | 41 +++++++++++++++++++ src/queries/query.ts | 7 ++++ 3 files changed, 52 insertions(+) create mode 100644 src/queries/base-query-format.ts create mode 100644 src/queries/call-context-query/call-context-query-format.ts create mode 100644 src/queries/query.ts diff --git a/src/queries/base-query-format.ts b/src/queries/base-query-format.ts new file mode 100644 index 0000000000..6365337582 --- /dev/null +++ b/src/queries/base-query-format.ts @@ -0,0 +1,4 @@ +export interface BaseQueryFormat { + /** used to select the query type :) */ + readonly type: string; +} diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts new file mode 100644 index 0000000000..52be90e92d --- /dev/null +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -0,0 +1,41 @@ +import type { BaseQueryFormat } from '../base-query-format'; + +export const enum CallTargets { + /** call targets a function that is not defined locally (e.g., the call targets a library function) */ + Global = 'global', + /** call targets a function that is defined locally */ + Local = 'local', + /** call targets a function that is defined locally or globally */ + Any = 'any' +} + +export interface DefaultCallContextQueryFormat extends BaseQueryFormat { + readonly type: 'call-context'; + /** Regex regarding the function name */ + readonly callName: RegExp; + /** kind may be a step or anything that you attach to the call, this can be used to group calls together (e.g., linking `ggplot` to `visualize`) */ + readonly kind: string; + /** subkinds are used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`) */ + readonly subkind: string; + /** call targets the function may have. This defaults to {@link CallTargets#Any}. */ + readonly callTargets?: CallTargets; +} + +interface LinkToLastCall extends BaseQueryFormat { + readonly type: 'link-to-last-call'; + /** Regex regarding the function name of the last call */ + readonly callName?: RegExp; + /** kind that this should be linked to (i.e., last call of the given kind) */ + readonly kind?: string; + /** subkind that this should be linked to (i.e., last call of the given subkind) */ + readonly subkind?: string; +} + +type LinkTo = LinkToLastCall; + +interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { + readonly linkTo: LinkTo; +} + + +export type CallContextQueryFormat = DefaultCallContextQueryFormat | SubCallContextQueryFormat; diff --git a/src/queries/query.ts b/src/queries/query.ts new file mode 100644 index 0000000000..4ea5054d1b --- /dev/null +++ b/src/queries/query.ts @@ -0,0 +1,7 @@ +import type { CallContextQueryFormat } from './call-context-query/call-context-query-format'; + +export type Query = CallContextQueryFormat; +export type QueryFormats = Query[]; + +/* TODO: generic query dispatcher */ +/* TODO: then execute the query and return the generic results */ From 617f338f4a302e695e680e149aa9acb5153e080f Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 20 Sep 2024 10:35:44 +0200 Subject: [PATCH 02/41] wip: queries --- src/dataflow/graph/vertex.ts | 1 + src/queries/base-query-format.ts | 5 +++++ .../call-context-query/call-context-query-executor.ts | 6 ++++++ .../call-context-query/call-context-query-format.ts | 5 ++++- src/queries/query.ts | 9 ++++++++- 5 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 src/queries/call-context-query/call-context-query-executor.ts diff --git a/src/dataflow/graph/vertex.ts b/src/dataflow/graph/vertex.ts index c8e2a090b2..24e33ee618 100644 --- a/src/dataflow/graph/vertex.ts +++ b/src/dataflow/graph/vertex.ts @@ -86,6 +86,7 @@ export interface DataflowGraphVertexFunctionDefinition extends DataflowGraphVert /** * The static subflow of the function definition, constructed within {@link processFunctionDefinition}. * If the vertex is (for example) a function, it can have a subgraph which is used as a template for each call. + * This is the `body` of the function. */ subflow: DataflowFunctionFlowInformation /** diff --git a/src/queries/base-query-format.ts b/src/queries/base-query-format.ts index 6365337582..1065cbd966 100644 --- a/src/queries/base-query-format.ts +++ b/src/queries/base-query-format.ts @@ -2,3 +2,8 @@ export interface BaseQueryFormat { /** used to select the query type :) */ readonly type: string; } + +/* TODO: type result? */ +export interface BaseQueryResult { + readonly queryType: Query['type']; +} diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts new file mode 100644 index 0000000000..e54de9e090 --- /dev/null +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -0,0 +1,6 @@ +import type { DataflowGraph } from '../../dataflow/graph/graph'; +import type { CallContextQueryFormat } from './call-context-query-format'; + +export function executeCallContextQuery(graph: DataflowGraph, query: CallContextQueryFormat): void { + +} diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 52be90e92d..e830ed6e3c 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -1,4 +1,4 @@ -import type { BaseQueryFormat } from '../base-query-format'; +import type { BaseQueryFormat, BaseQueryResult } from '../base-query-format'; export const enum CallTargets { /** call targets a function that is not defined locally (e.g., the call targets a library function) */ @@ -37,5 +37,8 @@ interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { readonly linkTo: LinkTo; } +export interface CallContextQueryResult extends BaseQueryResult { + /* TODO: continue */ +} export type CallContextQueryFormat = DefaultCallContextQueryFormat | SubCallContextQueryFormat; diff --git a/src/queries/query.ts b/src/queries/query.ts index 4ea5054d1b..e416258956 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,7 +1,14 @@ import type { CallContextQueryFormat } from './call-context-query/call-context-query-format'; +import type { DataflowGraph } from '../dataflow/graph/graph'; export type Query = CallContextQueryFormat; -export type QueryFormats = Query[]; +export type Queries = Query[]; /* TODO: generic query dispatcher */ /* TODO: then execute the query and return the generic results */ + + + +export function executeQuery(graph: DataflowGraph, query: Query): void { + +} From afb550eb96411c5cff1a043ecb5376f407f767b3 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 20 Sep 2024 11:54:01 +0200 Subject: [PATCH 03/41] wip: query typing infrastructure --- src/queries/base-query-format.ts | 1 - .../call-context-query-executor.ts | 7 ++--- .../call-context-query-format.ts | 13 ++++++++- src/queries/query.ts | 27 ++++++++++++++++--- 4 files changed, 40 insertions(+), 8 deletions(-) diff --git a/src/queries/base-query-format.ts b/src/queries/base-query-format.ts index 1065cbd966..f0c8d9ca22 100644 --- a/src/queries/base-query-format.ts +++ b/src/queries/base-query-format.ts @@ -3,7 +3,6 @@ export interface BaseQueryFormat { readonly type: string; } -/* TODO: type result? */ export interface BaseQueryResult { readonly queryType: Query['type']; } diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index e54de9e090..5a8e8946a0 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -1,6 +1,7 @@ import type { DataflowGraph } from '../../dataflow/graph/graph'; -import type { CallContextQueryFormat } from './call-context-query-format'; - -export function executeCallContextQuery(graph: DataflowGraph, query: CallContextQueryFormat): void { +import type { CallContextQueryFormat , CallContextQueryResult } from './call-context-query-format'; +export function executeCallContextQuery(graph: DataflowGraph, query: CallContextQueryFormat): CallContextQueryResult { + console.log('hey'); + return null as unknown as CallContextQueryResult; } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index e830ed6e3c..20ac686b45 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -1,4 +1,5 @@ import type { BaseQueryFormat, BaseQueryResult } from '../base-query-format'; +import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; export const enum CallTargets { /** call targets a function that is not defined locally (e.g., the call targets a library function) */ @@ -37,8 +38,18 @@ interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { readonly linkTo: LinkTo; } + +interface CallContextQuerySubKindResult { + readonly callName: string; + readonly id: NodeId; + /* ids attached by the linkTo query */ + readonly linkedIds: readonly NodeId[]; +} + export interface CallContextQueryResult extends BaseQueryResult { - /* TODO: continue */ + readonly kind: string; + /** maps each subkind to the results found */ + readonly subkinds: Readonly> } export type CallContextQueryFormat = DefaultCallContextQueryFormat | SubCallContextQueryFormat; diff --git a/src/queries/query.ts b/src/queries/query.ts index e416258956..2f66003008 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,14 +1,35 @@ import type { CallContextQueryFormat } from './call-context-query/call-context-query-format'; import type { DataflowGraph } from '../dataflow/graph/graph'; +import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; +import { executeCallContextQuery } from './call-context-query/call-context-query-executor'; +import { guard } from '../util/assert'; export type Query = CallContextQueryFormat; export type Queries = Query[]; -/* TODO: generic query dispatcher */ -/* TODO: then execute the query and return the generic results */ +type QueryWithType = Query & { type: QueryType }; +export type QueryExecutor> = (graph: DataflowGraph, query: Query) => Result; -export function executeQuery(graph: DataflowGraph, query: Query): void { +type SupportedQueries = { + [QueryType in Query['type']]: QueryExecutor, BaseQueryResult>> +} + +export const SupportedQueries = { + 'call-context': executeCallContextQuery +} as const satisfies SupportedQueries; + +export type QueryResult = ReturnType; + + +export function executeQuery(graph: DataflowGraph, query: SpecificQuery): QueryResult { + const executor = SupportedQueries[query.type]; + guard(executor !== undefined, `Unsupported query type: ${query.type}`); + return executor(graph, query); +} +export function executeQueries(graph: DataflowGraph, queries: Queries): BaseQueryResult[] { + return queries.map(query => executeQuery(graph, query)); + /** TODO: test instrumentation */ } From 61f1da5d317b559718eb3329ec5336b7205e446d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 20 Sep 2024 15:56:13 +0200 Subject: [PATCH 04/41] wip: multi-stage query resolve --- .../call-context-query-executor.ts | 21 ++++++-- .../call-context-query-format.ts | 29 ++++++----- src/queries/query.ts | 43 ++++++++++----- test/functionality/dataflow/dataflow.spec.ts | 4 ++ .../query/call-context-query-tests.ts | 52 +++++++++++++++++++ test/functionality/util/objects-tests.ts | 4 +- 6 files changed, 123 insertions(+), 30 deletions(-) create mode 100644 test/functionality/dataflow/query/call-context-query-tests.ts diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 5a8e8946a0..86d4a2c440 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -1,7 +1,20 @@ import type { DataflowGraph } from '../../dataflow/graph/graph'; -import type { CallContextQueryFormat , CallContextQueryResult } from './call-context-query-format'; +import type { CallContextQuery , CallContextQueryResult } from './call-context-query-format'; -export function executeCallContextQuery(graph: DataflowGraph, query: CallContextQueryFormat): CallContextQueryResult { - console.log('hey'); - return null as unknown as CallContextQueryResult; +/* TODO: Group all names etc. together to traverse only once */ + +/** + * Multi-stage call context query resolve. + * + * 1. Resolve all calls in the DF graph that match the respective {@link DefaultCallContextQueryFormat#callName} regex. + * This includes any function calls to be collected for 'linkTo' resolutions. + * 2. Identify their respective call targets, if {@link DefaultCallContextQueryFormat#callTargets} is set to be non-any. + * 3. Attach `linkTo` calls to the respective calls. + */ +export function executeCallContextQueries(graph: DataflowGraph, queries: readonly CallContextQuery[]): CallContextQueryResult { + + return { + queryType: 'call-context', + kinds: {} + }; } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 20ac686b45..9c0760941f 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -22,14 +22,16 @@ export interface DefaultCallContextQueryFormat extends BaseQueryFormat { readonly callTargets?: CallTargets; } +/** + * Links the current call to the last call of the given kind. + * This way, you can link a call like `points` to the latest graphics plot etc. + * Please note that this may still result in a standalone, unlinked result + * if we are unable to find a call of the given kind. + */ interface LinkToLastCall extends BaseQueryFormat { readonly type: 'link-to-last-call'; /** Regex regarding the function name of the last call */ readonly callName?: RegExp; - /** kind that this should be linked to (i.e., last call of the given kind) */ - readonly kind?: string; - /** subkind that this should be linked to (i.e., last call of the given subkind) */ - readonly subkind?: string; } type LinkTo = LinkToLastCall; @@ -40,16 +42,19 @@ interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { interface CallContextQuerySubKindResult { - readonly callName: string; - readonly id: NodeId; + readonly callName: string; + readonly id: NodeId; + /* ids of functions which are called by the respective function call */ + readonly calls?: readonly NodeId[]; /* ids attached by the linkTo query */ - readonly linkedIds: readonly NodeId[]; + readonly linkedIds?: readonly NodeId[]; } -export interface CallContextQueryResult extends BaseQueryResult { - readonly kind: string; - /** maps each subkind to the results found */ - readonly subkinds: Readonly> +export interface CallContextQueryResult extends BaseQueryResult { + readonly kinds: Record + }> } -export type CallContextQueryFormat = DefaultCallContextQueryFormat | SubCallContextQueryFormat; +export type CallContextQuery = DefaultCallContextQueryFormat | SubCallContextQueryFormat; diff --git a/src/queries/query.ts b/src/queries/query.ts index 2f66003008..ee88cb9d15 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,15 +1,16 @@ -import type { CallContextQueryFormat } from './call-context-query/call-context-query-format'; +import type { CallContextQuery } from './call-context-query/call-context-query-format'; import type { DataflowGraph } from '../dataflow/graph/graph'; import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; -import { executeCallContextQuery } from './call-context-query/call-context-query-executor'; +import { executeCallContextQueries } from './call-context-query/call-context-query-executor'; import { guard } from '../util/assert'; -export type Query = CallContextQueryFormat; +export type Query = CallContextQuery; export type Queries = Query[]; type QueryWithType = Query & { type: QueryType }; -export type QueryExecutor> = (graph: DataflowGraph, query: Query) => Result; +/* Each executor receives all queries of its type in case it wants to avoid repeated traversal */ +export type QueryExecutor> = (graph: DataflowGraph, query: Query[]) => Result; type SupportedQueries = { @@ -17,19 +18,37 @@ type SupportedQueries = { } export const SupportedQueries = { - 'call-context': executeCallContextQuery + 'call-context': executeCallContextQueries } as const satisfies SupportedQueries; -export type QueryResult = ReturnType; +export type SupportedQueryTypes = keyof typeof SupportedQueries; +export type QueryResult = ReturnType; +export function executeQueriesOfSameType(graph: DataflowGraph, ...queries: SpecificQuery[]): QueryResult { + guard(queries.length > 0, 'At least one query must be provided'); + /* every query must have the same type */ + guard(queries.every(q => q.type === queries[0].type), 'All queries must have the same type'); + const executor = SupportedQueries[queries[0].type]; + guard(executor !== undefined, `Unsupported query type: ${queries[0].type}`); + return executor(graph, queries) as QueryResult; +} -export function executeQuery(graph: DataflowGraph, query: SpecificQuery): QueryResult { - const executor = SupportedQueries[query.type]; - guard(executor !== undefined, `Unsupported query type: ${query.type}`); - return executor(graph, query); +function groupQueriesByType(queries: Queries): Record { + const grouped: Record = {} as Record; + for(const query of queries) { + if(grouped[query.type] === undefined) { + grouped[query.type] = []; + } + grouped[query.type].push(query); + } + return grouped; } -export function executeQueries(graph: DataflowGraph, queries: Queries): BaseQueryResult[] { - return queries.map(query => executeQuery(graph, query)); +export function executeQueries(graph: DataflowGraph, queries: readonly [QueryWithType]): [QueryResult] +export function executeQueries(graph: DataflowGraph, queries: readonly QueryWithType[]): QueryResult[] +/* TODO: map query result to query type involved */ +export function executeQueries(graph: DataflowGraph, queries: readonly QueryWithType[]): QueryResult[] { + const grouped = groupQueriesByType(queries); + return queries.map(query => executeQueriesOfSameType(graph, query)); /** TODO: test instrumentation */ } diff --git a/test/functionality/dataflow/dataflow.spec.ts b/test/functionality/dataflow/dataflow.spec.ts index 819937449b..f343da6e82 100644 --- a/test/functionality/dataflow/dataflow.spec.ts +++ b/test/functionality/dataflow/dataflow.spec.ts @@ -10,5 +10,9 @@ describe('Dataflow', () => { requireAllTestsInFolder(path.join(__dirname, 'graph')) ); + describe('Query', () => + requireAllTestsInFolder(path.join(__dirname, 'query')) + ); + require('./processing-of-elements/processing-of-elements'); }); diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts new file mode 100644 index 0000000000..a02006856d --- /dev/null +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -0,0 +1,52 @@ +import type { + CallContextQuery, + CallContextQueryResult +} from '../../../../src/queries/call-context-query/call-context-query-format'; +import { PipelineExecutor } from '../../../../src/core/pipeline-executor'; +import type { RShell } from '../../../../src/r-bridge/shell'; +import { DEFAULT_DATAFLOW_PIPELINE } from '../../../../src/core/steps/pipeline/default-pipelines'; +import { requestFromInput } from '../../../../src/r-bridge/retriever'; +import { deterministicCountingIdGenerator } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import { executeQueries } from '../../../../src/queries/query'; +import { withShell } from '../../_helper/shell'; +import { assert } from 'chai'; +import { BuiltIn } from '../../../../src/dataflow/environments/built-in'; + +function test(name: string, shell: RShell, code: string, query: CallContextQuery, expected: CallContextQueryResult) { + /* TODO: labels */ + it(name, async() => { + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(code), + getId: deterministicCountingIdGenerator(0) + }).allRemainingSteps(); + + const graph = info.dataflow.graph; + const [result] = executeQueries(graph, [query]); + /* expect them to be deeply equal */ + assert.deepStrictEqual(result, expected, 'The result of the call context query does not match the expected result'); + }); +} + +describe('Call Context Query', withShell(shell => { + /* TODO: merge results for higher queries so that all subkinds etc. are joined */ + test('Print calls', shell, 'print(1)', { + type: 'call-context', + callName: /print/, + kind: 'visualize', + subkind: 'print' + }, { + queryType: 'call-context', + kinds: { + 'visualize': { + subkinds: { + 'print': [{ + callName: 'print', + calls: [BuiltIn], + id: 0 + }] + } + } + } + }); +})); diff --git a/test/functionality/util/objects-tests.ts b/test/functionality/util/objects-tests.ts index 223a6cfcd4..6fc505de5c 100644 --- a/test/functionality/util/objects-tests.ts +++ b/test/functionality/util/objects-tests.ts @@ -6,12 +6,12 @@ describe('Objects', () => { describe('isObjectOrArray', () => { const positive = (a: unknown, msg: string): void => { it(msg, () => { - assert.isTrue(isObjectOrArray(a)); + assert.isTrue(isObjectOrArray(a)); }); }; const negative = (a: unknown, msg: string): void => { it(msg, () => { - assert.isFalse(isObjectOrArray(a), `${msg} is not considered an object`); + assert.isFalse(isObjectOrArray(a), `${msg} is not considered an object`); }); }; From 25fe8ced980e7fc27d969dff8b3a4273f51aa031 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 20 Sep 2024 21:47:47 +0200 Subject: [PATCH 05/41] wip: basic queries :) --- .../call-context-query-executor.ts | 130 +++++++++++++++++- .../call-context-query-format.ts | 23 +++- src/queries/query.ts | 20 +-- .../query/call-context-query-tests.ts | 14 +- 4 files changed, 161 insertions(+), 26 deletions(-) diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 86d4a2c440..ef02481b7c 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -1,20 +1,142 @@ import type { DataflowGraph } from '../../dataflow/graph/graph'; -import type { CallContextQuery , CallContextQueryResult } from './call-context-query-format'; +import type { + CallContextQuery, + CallContextQueryKindResult, + CallContextQueryResult } from './call-context-query-format'; +import { + CallTargets +} from './call-context-query-format'; +import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; +import { VertexType } from '../../dataflow/graph/vertex'; +import { assertUnreachable } from '../../util/assert'; +import { edgeIncludesType, EdgeType } from '../../dataflow/graph/edge'; +import type { DeepWritable } from 'ts-essentials'; -/* TODO: Group all names etc. together to traverse only once */ +class TwoLayerCollector { + readonly store = new Map>(); + + public add(layer1: Layer1, layer2: Layer2, value: Values) { + let layer2Map = this.store.get(layer1); + if(layer2Map === undefined) { + layer2Map = new Map(); + this.store.set(layer1, layer2Map); + } + let values = layer2Map.get(layer2); + if(values === undefined) { + values = []; + layer2Map.set(layer2, values); + } + values.push(value); + } + + public get(layer1: Layer1, layer2: Layer2): Values[] | undefined { + return this.store.get(layer1)?.get(layer2); + } + + public outerKeys(): Iterable { + return this.store.keys(); + } + + public innerKeys(layer1: Layer1): Iterable { + return this.store.get(layer1)?.keys() ?? []; + } + + public asciiSummary() { + let result = ''; + for(const [layer1, layer2Map] of this.store) { + result += `${JSON.stringify(layer1)}\n`; + for(const [layer2, values] of layer2Map) { + result += ` ╰ ${JSON.stringify(layer2)}: ${JSON.stringify(values)}\n`; + } + } + return result; + } +} + +function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | false { + const callVertex = graph.get(id); + if(callVertex === undefined) { + return false; + } + const [,outgoing] = callVertex; + const callTargets: NodeId[] = [...outgoing] + .filter(([, e]) => edgeIncludesType(e.types, EdgeType.Calls)) + .map(([t]) => t) + ; + + switch(callTarget) { + case CallTargets.Any: + return callTargets; + case CallTargets.Global: + return callTargets.length === 0 ? callTargets : false; + case CallTargets.Local: + return callTargets.length > 0 ? callTargets : false; + default: + assertUnreachable(callTarget); + } +} + +function makeReport(collector: TwoLayerCollector): CallContextQueryKindResult { + const result: CallContextQueryKindResult = {} as unknown as CallContextQueryKindResult; + for(const [kind, collected] of collector.store) { + const subkinds = {} as DeepWritable; + for(const [subkind, values] of collected) { + subkinds[subkind] ??= []; + const collectIn = subkinds[subkind]; + for(const value of values) { + const [id, name, calls] = value; + collectIn.push({ + callName: name, + id, + calls + }); + } + } + result[kind] = { + subkinds + }; + } + return result; +} /** * Multi-stage call context query resolve. * * 1. Resolve all calls in the DF graph that match the respective {@link DefaultCallContextQueryFormat#callName} regex. - * This includes any function calls to be collected for 'linkTo' resolutions. * 2. Identify their respective call targets, if {@link DefaultCallContextQueryFormat#callTargets} is set to be non-any. + * This happens during the main resolution! * 3. Attach `linkTo` calls to the respective calls. */ export function executeCallContextQueries(graph: DataflowGraph, queries: readonly CallContextQuery[]): CallContextQueryResult { + /* the node id, name, and call targets if present */ + const initialIdCollector = new TwoLayerCollector(); + + for(const [node, info] of graph.vertices(true)) { + if(info.tag !== VertexType.FunctionCall) { + continue; + } + for(const query of queries.filter(q => q.callName.test(info.name))) { + let targets: NodeId[] | false = false; + if(query.callTargets) { + targets = satisfiesCallTargets(node, graph, query.callTargets); + if(targets === false) { + continue; + } + } + if(targets === false) { + initialIdCollector.add(query.kind, query.subkind, [node, info.name]); + } else { + initialIdCollector.add(query.kind, query.subkind, [node, info.name, targets]); + } + } + } + + /* TODO: link to */ + console.log(initialIdCollector.asciiSummary()); return { queryType: 'call-context', - kinds: {} + kinds: makeReport(initialIdCollector) }; } + diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 9c0760941f..ae8b22e2e7 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -18,7 +18,10 @@ export interface DefaultCallContextQueryFormat extends BaseQueryFormat { readonly kind: string; /** subkinds are used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`) */ readonly subkind: string; - /** call targets the function may have. This defaults to {@link CallTargets#Any}. */ + /** + * Call targets the function may have. This defaults to {@link CallTargets#Any}. + * Request this specifically to gain all call targets we can resolve. + */ readonly callTargets?: CallTargets; } @@ -44,17 +47,23 @@ interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { interface CallContextQuerySubKindResult { readonly callName: string; readonly id: NodeId; - /* ids of functions which are called by the respective function call */ + /** + * Ids of functions which are called by the respective function call, + * this will only be populated whenever you explicitly state the {@link DefaultCallContextQueryFormat#callTargets}. + * An empty array means that the call targets only non-local functions. + */ readonly calls?: readonly NodeId[]; - /* ids attached by the linkTo query */ + /** ids attached by the linkTo query */ readonly linkedIds?: readonly NodeId[]; } +export type CallContextQueryKindResult = Record +}> + export interface CallContextQueryResult extends BaseQueryResult { - readonly kinds: Record - }> + readonly kinds: CallContextQueryKindResult; } export type CallContextQuery = DefaultCallContextQueryFormat | SubCallContextQueryFormat; diff --git a/src/queries/query.ts b/src/queries/query.ts index ee88cb9d15..1495e9d256 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -5,7 +5,6 @@ import { executeCallContextQueries } from './call-context-query/call-context-que import { guard } from '../util/assert'; export type Query = CallContextQuery; -export type Queries = Query[]; type QueryWithType = Query & { type: QueryType }; @@ -33,7 +32,7 @@ export function executeQueriesOfSameType(graph: Dat return executor(graph, queries) as QueryResult; } -function groupQueriesByType(queries: Queries): Record { +function groupQueriesByType(queries: readonly QueryWithType[]): Record { const grouped: Record = {} as Record; for(const query of queries) { if(grouped[query.type] === undefined) { @@ -44,11 +43,16 @@ function groupQueriesByType(queries: Queries): Record { return grouped; } -export function executeQueries(graph: DataflowGraph, queries: readonly [QueryWithType]): [QueryResult] -export function executeQueries(graph: DataflowGraph, queries: readonly QueryWithType[]): QueryResult[] -/* TODO: map query result to query type involved */ -export function executeQueries(graph: DataflowGraph, queries: readonly QueryWithType[]): QueryResult[] { +/* a record mapping the query type present to its respective result */ +export type QueriesResult = { + [QueryType in Base]: QueryResult +} + +export function executeQueries(graph: DataflowGraph, queries: readonly QueryWithType[]): QueriesResult { const grouped = groupQueriesByType(queries); - return queries.map(query => executeQueriesOfSameType(graph, query)); - /** TODO: test instrumentation */ + const results: QueriesResult = {} as QueriesResult; + for(const type of Object.keys(grouped) as Base[]) { + results[type] = executeQueriesOfSameType(graph, ...grouped[type]) as QueryResult; + } + return results; } diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index a02006856d..fd8f22e873 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -1,7 +1,8 @@ import type { CallContextQuery, - CallContextQueryResult -} from '../../../../src/queries/call-context-query/call-context-query-format'; + CallContextQueryResult } from '../../../../src/queries/call-context-query/call-context-query-format'; + + import { PipelineExecutor } from '../../../../src/core/pipeline-executor'; import type { RShell } from '../../../../src/r-bridge/shell'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../../../src/core/steps/pipeline/default-pipelines'; @@ -10,7 +11,6 @@ import { deterministicCountingIdGenerator } from '../../../../src/r-bridge/lang- import { executeQueries } from '../../../../src/queries/query'; import { withShell } from '../../_helper/shell'; import { assert } from 'chai'; -import { BuiltIn } from '../../../../src/dataflow/environments/built-in'; function test(name: string, shell: RShell, code: string, query: CallContextQuery, expected: CallContextQueryResult) { /* TODO: labels */ @@ -22,28 +22,28 @@ function test(name: string, shell: RShell, code: string, query: CallContextQuery }).allRemainingSteps(); const graph = info.dataflow.graph; - const [result] = executeQueries(graph, [query]); + const { 'call-context': result } = executeQueries(graph, [query]); /* expect them to be deeply equal */ assert.deepStrictEqual(result, expected, 'The result of the call context query does not match the expected result'); }); } describe('Call Context Query', withShell(shell => { - /* TODO: merge results for higher queries so that all subkinds etc. are joined */ test('Print calls', shell, 'print(1)', { type: 'call-context', callName: /print/, kind: 'visualize', subkind: 'print' }, { + /** TODO: change to type */ queryType: 'call-context', kinds: { 'visualize': { subkinds: { 'print': [{ + /** TODO: show callName only if differs | there is a real regex */ callName: 'print', - calls: [BuiltIn], - id: 0 + id: 3 }] } } From 73e5d1000533c95dd1d7ea61e2dbb14952aef947 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 21 Sep 2024 11:48:50 +0200 Subject: [PATCH 06/41] wip: virtual query experimentation --- src/queries/base-query-format.ts | 3 +- .../call-context-query-executor.ts | 21 ++++++------- .../call-context-query-format.ts | 5 ++- src/queries/query.ts | 22 ++++++++++--- src/queries/virtual-query/compound-query.ts | 24 ++++++++++++++ src/queries/virtual-query/virtual-queries.ts | 19 ++++++++++++ .../query/call-context-query-tests.ts | 31 ++++++++++--------- 7 files changed, 92 insertions(+), 33 deletions(-) create mode 100644 src/queries/virtual-query/compound-query.ts create mode 100644 src/queries/virtual-query/virtual-queries.ts diff --git a/src/queries/base-query-format.ts b/src/queries/base-query-format.ts index f0c8d9ca22..427dcdb37d 100644 --- a/src/queries/base-query-format.ts +++ b/src/queries/base-query-format.ts @@ -4,5 +4,6 @@ export interface BaseQueryFormat { } export interface BaseQueryResult { - readonly queryType: Query['type']; + /** type of the query which produced this result */ + readonly type: Query['type']; } diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index ef02481b7c..850bd7e6b5 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -67,16 +67,16 @@ function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: Call switch(callTarget) { case CallTargets.Any: return callTargets; - case CallTargets.Global: + case CallTargets.OnlyGlobal: return callTargets.length === 0 ? callTargets : false; - case CallTargets.Local: + case CallTargets.OnlyLocal: return callTargets.length > 0 ? callTargets : false; default: assertUnreachable(callTarget); } } -function makeReport(collector: TwoLayerCollector): CallContextQueryKindResult { +function makeReport(collector: TwoLayerCollector): CallContextQueryKindResult { const result: CallContextQueryKindResult = {} as unknown as CallContextQueryKindResult; for(const [kind, collected] of collector.store) { const subkinds = {} as DeepWritable; @@ -84,9 +84,8 @@ function makeReport(collector: TwoLayerCollector(); + /* the node id and call targets if present */ + const initialIdCollector = new TwoLayerCollector(); for(const [node, info] of graph.vertices(true)) { if(info.tag !== VertexType.FunctionCall) { @@ -124,9 +123,9 @@ export function executeCallContextQueries(graph: DataflowGraph, queries: readonl } } if(targets === false) { - initialIdCollector.add(query.kind, query.subkind, [node, info.name]); + initialIdCollector.add(query.kind, query.subkind, [node]); } else { - initialIdCollector.add(query.kind, query.subkind, [node, info.name, targets]); + initialIdCollector.add(query.kind, query.subkind, [node, targets]); } } } @@ -135,8 +134,8 @@ export function executeCallContextQueries(graph: DataflowGraph, queries: readonl console.log(initialIdCollector.asciiSummary()); return { - queryType: 'call-context', - kinds: makeReport(initialIdCollector) + type: 'call-context', + kinds: makeReport(initialIdCollector) }; } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index ae8b22e2e7..57cdd7c1a9 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -3,9 +3,9 @@ import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-i export const enum CallTargets { /** call targets a function that is not defined locally (e.g., the call targets a library function) */ - Global = 'global', + OnlyGlobal = 'global', /** call targets a function that is defined locally */ - Local = 'local', + OnlyLocal = 'local', /** call targets a function that is defined locally or globally */ Any = 'any' } @@ -45,7 +45,6 @@ interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { interface CallContextQuerySubKindResult { - readonly callName: string; readonly id: NodeId; /** * Ids of functions which are called by the respective function call, diff --git a/src/queries/query.ts b/src/queries/query.ts index 1495e9d256..555a291275 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -3,10 +3,12 @@ import type { DataflowGraph } from '../dataflow/graph/graph'; import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; import { executeCallContextQueries } from './call-context-query/call-context-query-executor'; import { guard } from '../util/assert'; +import type { VirtualQuery } from './virtual-query/virtual-queries'; +import { SupportedVirtualQueries } from './virtual-query/virtual-queries'; export type Query = CallContextQuery; -type QueryWithType = Query & { type: QueryType }; +export type QueryWithType = Query & { type: QueryType }; /* Each executor receives all queries of its type in case it wants to avoid repeated traversal */ export type QueryExecutor> = (graph: DataflowGraph, query: Query[]) => Result; @@ -20,6 +22,7 @@ export const SupportedQueries = { 'call-context': executeCallContextQueries } as const satisfies SupportedQueries; + export type SupportedQueryTypes = keyof typeof SupportedQueries; export type QueryResult = ReturnType; @@ -32,14 +35,25 @@ export function executeQueriesOfSameType(graph: Dat return executor(graph, queries) as QueryResult; } -function groupQueriesByType(queries: readonly QueryWithType[]): Record { +function groupQueriesByType(queries: readonly (QueryWithType | VirtualQuery)[]): Record { const grouped: Record = {} as Record; - for(const query of queries) { + function addQuery(query: Query) { if(grouped[query.type] === undefined) { grouped[query.type] = []; } grouped[query.type].push(query); } + for(const query of queries) { + const virtualQuery = SupportedVirtualQueries[query.type as keyof typeof SupportedVirtualQueries]; + if(virtualQuery !== undefined) { + const subQueries = virtualQuery(query as VirtualQuery); + for(const subQuery of subQueries) { + addQuery(subQuery); + } + } else { + addQuery(query as Query); + } + } return grouped; } @@ -48,7 +62,7 @@ export type QueriesResult = { [QueryType in Base]: QueryResult } -export function executeQueries(graph: DataflowGraph, queries: readonly QueryWithType[]): QueriesResult { +export function executeQueries(graph: DataflowGraph, queries: readonly (QueryWithType | VirtualQuery)[]): QueriesResult { const grouped = groupQueriesByType(queries); const results: QueriesResult = {} as QueriesResult; for(const type of Object.keys(grouped) as Base[]) { diff --git a/src/queries/virtual-query/compound-query.ts b/src/queries/virtual-query/compound-query.ts new file mode 100644 index 0000000000..095571d8ce --- /dev/null +++ b/src/queries/virtual-query/compound-query.ts @@ -0,0 +1,24 @@ +import type { QueryWithType, SupportedQueryTypes } from '../query'; +import type { BaseQueryFormat } from '../base-query-format'; + +/** + * Virtual Query Format. + * Grouping query parameters of the same type (re-specified in the `query` field). + */ +export interface CompoundQueryFormat> extends BaseQueryFormat { + readonly type: 'compound'; + readonly query: SubQueryType; + /** you do not have to re-state the type, this is automatically filled with 'query' */ + readonly arguments: readonly Omit, 'type'>[]; +} + +export function executeCompoundQueries>(query: CompoundQueryFormat): QueryWithType[] { + const results: QueryWithType[] = []; + for(const arg of query.arguments) { + results.push({ + type: query.query, + ...arg + } as QueryWithType); + } + return results; +} diff --git a/src/queries/virtual-query/virtual-queries.ts b/src/queries/virtual-query/virtual-queries.ts new file mode 100644 index 0000000000..44eee52030 --- /dev/null +++ b/src/queries/virtual-query/virtual-queries.ts @@ -0,0 +1,19 @@ +import type { QueryWithType, SupportedQueryTypes } from '../query'; +import type { CompoundQueryFormat } from './compound-query'; +import { executeCompoundQueries } from './compound-query'; +import type { BaseQueryFormat } from '../base-query-format'; + +/** A query that does not perform a search but may perform (e.g., convenience) modifications of other queries */ +export type VirtualQuery = CompoundQueryFormat; + + +/* Each executor receives all queries of its type in case it wants to avoid repeated traversal */ +export type VirtualQueryExecutor = (query: Query) => Result; + +type SupportedVirtualQueries = { + [QueryType in VirtualQuery['type']]: VirtualQueryExecutor, BaseQueryFormat[]> +} + +export const SupportedVirtualQueries = { + 'compound': executeCompoundQueries +} as const satisfies SupportedVirtualQueries; diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index fd8f22e873..fe4b03c906 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -1,6 +1,9 @@ import type { CallContextQuery, CallContextQueryResult } from '../../../../src/queries/call-context-query/call-context-query-format'; +import { + CallTargets +} from '../../../../src/queries/call-context-query/call-context-query-format'; import { PipelineExecutor } from '../../../../src/core/pipeline-executor'; @@ -12,7 +15,7 @@ import { executeQueries } from '../../../../src/queries/query'; import { withShell } from '../../_helper/shell'; import { assert } from 'chai'; -function test(name: string, shell: RShell, code: string, query: CallContextQuery, expected: CallContextQueryResult) { +function test(name: string, shell: RShell, code: string, queries: readonly CallContextQuery[], expected: CallContextQueryResult) { /* TODO: labels */ it(name, async() => { const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { @@ -22,28 +25,28 @@ function test(name: string, shell: RShell, code: string, query: CallContextQuery }).allRemainingSteps(); const graph = info.dataflow.graph; - const { 'call-context': result } = executeQueries(graph, [query]); + const { 'call-context': result } = executeQueries(graph, queries); /* expect them to be deeply equal */ assert.deepStrictEqual(result, expected, 'The result of the call context query does not match the expected result'); }); } +/** TODO: check what happens if builtin if may be override */ describe('Call Context Query', withShell(shell => { - test('Print calls', shell, 'print(1)', { - type: 'call-context', - callName: /print/, - kind: 'visualize', - subkind: 'print' - }, { - /** TODO: change to type */ - queryType: 'call-context', - kinds: { + test('Print calls', shell, 'print(1)', [{ + type: 'call-context', + callName: /print/, + kind: 'visualize', + subkind: 'print', + callTargets: CallTargets.OnlyGlobal + }], { + type: 'call-context', + kinds: { 'visualize': { subkinds: { 'print': [{ - /** TODO: show callName only if differs | there is a real regex */ - callName: 'print', - id: 3 + id: 3, + calls: [] }] } } From 53ad324ec97c900025fc56e0ddc2921ae136b2f3 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 21 Sep 2024 14:47:06 +0200 Subject: [PATCH 07/41] feat: compound queries --- src/queries/base-query-format.ts | 9 ++- .../call-context-query-executor.ts | 6 +- .../call-context-query-format.ts | 2 +- src/queries/query.ts | 47 ++++++++---- src/queries/virtual-query/compound-query.ts | 45 +++++++++--- src/queries/virtual-query/virtual-queries.ts | 11 +-- test/functionality/_helper/label.ts | 14 ++-- test/functionality/_helper/query.ts | 72 +++++++++++++++++++ .../query/call-context-query-tests.ts | 65 ++++++----------- .../dataflow/query/compound-query-tests.ts | 35 +++++++++ 10 files changed, 225 insertions(+), 81 deletions(-) create mode 100644 test/functionality/_helper/query.ts create mode 100644 test/functionality/dataflow/query/compound-query-tests.ts diff --git a/src/queries/base-query-format.ts b/src/queries/base-query-format.ts index 427dcdb37d..b351810cf4 100644 --- a/src/queries/base-query-format.ts +++ b/src/queries/base-query-format.ts @@ -3,7 +3,10 @@ export interface BaseQueryFormat { readonly type: string; } -export interface BaseQueryResult { - /** type of the query which produced this result */ - readonly type: Query['type']; +export interface BaseQueryMeta { + /** Duration in milliseconds */ + readonly timing: number; +} +export interface BaseQueryResult { + readonly '.meta': BaseQueryMeta; } diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 850bd7e6b5..e8da5131f3 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -107,6 +107,8 @@ function makeReport(collector: TwoLayerCollector(); @@ -134,7 +136,9 @@ export function executeCallContextQueries(graph: DataflowGraph, queries: readonl console.log(initialIdCollector.asciiSummary()); return { - type: 'call-context', + '.meta': { + timing: Date.now() - now + }, kinds: makeReport(initialIdCollector) }; } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 57cdd7c1a9..389e373495 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -61,7 +61,7 @@ export type CallContextQueryKindResult = Record }> -export interface CallContextQueryResult extends BaseQueryResult { +export interface CallContextQueryResult extends BaseQueryResult { readonly kinds: CallContextQueryKindResult; } diff --git a/src/queries/query.ts b/src/queries/query.ts index 555a291275..9a2af22958 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -3,19 +3,21 @@ import type { DataflowGraph } from '../dataflow/graph/graph'; import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; import { executeCallContextQueries } from './call-context-query/call-context-query-executor'; import { guard } from '../util/assert'; -import type { VirtualQuery } from './virtual-query/virtual-queries'; +import type { VirtualQueryArgumentsWithType } from './virtual-query/virtual-queries'; import { SupportedVirtualQueries } from './virtual-query/virtual-queries'; +import type { Writable } from 'ts-essentials'; +import type { VirtualCompoundConstraint } from './virtual-query/compound-query'; export type Query = CallContextQuery; -export type QueryWithType = Query & { type: QueryType }; +export type QueryArgumentsWithType = Query & { type: QueryType }; /* Each executor receives all queries of its type in case it wants to avoid repeated traversal */ -export type QueryExecutor> = (graph: DataflowGraph, query: Query[]) => Result; +export type QueryExecutor = (graph: DataflowGraph, query: readonly Query[]) => Result; type SupportedQueries = { - [QueryType in Query['type']]: QueryExecutor, BaseQueryResult>> + [QueryType in Query['type']]: QueryExecutor, BaseQueryResult> } export const SupportedQueries = { @@ -35,7 +37,17 @@ export function executeQueriesOfSameType(graph: Dat return executor(graph, queries) as QueryResult; } -function groupQueriesByType(queries: readonly (QueryWithType | VirtualQuery)[]): Record { +function isVirtualQuery< + Base extends SupportedQueryTypes, + VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +>(query: QueryArgumentsWithType | VirtualQueryArgumentsWithType): query is VirtualQueryArgumentsWithType { + return SupportedVirtualQueries[query.type as keyof typeof SupportedVirtualQueries] !== undefined; +} + +function groupQueriesByType< + Base extends SupportedQueryTypes, + VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +>(queries: readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]): Record { const grouped: Record = {} as Record; function addQuery(query: Query) { if(grouped[query.type] === undefined) { @@ -44,9 +56,9 @@ function groupQueriesByType(queries: readonly grouped[query.type].push(query); } for(const query of queries) { - const virtualQuery = SupportedVirtualQueries[query.type as keyof typeof SupportedVirtualQueries]; - if(virtualQuery !== undefined) { - const subQueries = virtualQuery(query as VirtualQuery); + if(isVirtualQuery(query)) { + const executor = SupportedVirtualQueries[query.type]; + const subQueries = executor(query); for(const subQuery of subQueries) { addQuery(subQuery); } @@ -59,14 +71,21 @@ function groupQueriesByType(queries: readonly /* a record mapping the query type present to its respective result */ export type QueriesResult = { - [QueryType in Base]: QueryResult -} + readonly [QueryType in Base]: QueryResult +} & BaseQueryResult -export function executeQueries(graph: DataflowGraph, queries: readonly (QueryWithType | VirtualQuery)[]): QueriesResult { +export function executeQueries< + Base extends SupportedQueryTypes, + VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +>(graph: DataflowGraph, queries: readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]): QueriesResult { + const now = Date.now(); const grouped = groupQueriesByType(queries); - const results: QueriesResult = {} as QueriesResult; + const results = {} as Writable>; for(const type of Object.keys(grouped) as Base[]) { - results[type] = executeQueriesOfSameType(graph, ...grouped[type]) as QueryResult; + results[type] = executeQueriesOfSameType(graph, ...grouped[type]) as QueriesResult[Base]; } - return results; + results['.meta'] = { + timing: Date.now() - now + }; + return results as QueriesResult; } diff --git a/src/queries/virtual-query/compound-query.ts b/src/queries/virtual-query/compound-query.ts index 095571d8ce..f939de4b64 100644 --- a/src/queries/virtual-query/compound-query.ts +++ b/src/queries/virtual-query/compound-query.ts @@ -1,24 +1,51 @@ -import type { QueryWithType, SupportedQueryTypes } from '../query'; +import type { QueryArgumentsWithType, SupportedQueryTypes } from '../query'; import type { BaseQueryFormat } from '../base-query-format'; +import type { StrictOmit } from 'ts-essentials'; + +/** @see CompoundQueryFormat */ +export type VirtualCompoundConstraint = keyof StrictOmit, 'type'> /** * Virtual Query Format. * Grouping query parameters of the same type (re-specified in the `query` field). */ -export interface CompoundQueryFormat> extends BaseQueryFormat { - readonly type: 'compound'; - readonly query: SubQueryType; - /** you do not have to re-state the type, this is automatically filled with 'query' */ - readonly arguments: readonly Omit, 'type'>[]; +export interface CompoundQueryFormat< + /* The queries we collect, locked by `query` demanding a single type */ + SubQueryType extends SupportedQueryTypes, + /* arguments we give which are common for all queries */ + CommonArguments extends VirtualCompoundConstraint +> extends BaseQueryFormat { + readonly type: 'compound'; + readonly query: SubQueryType; + /** defaults to use the same arguments for all queries */ + readonly commonArguments: Pick, CommonArguments>; + /** + * You do not have to re-state the type, this is automatically filled with the type for 'query' + * Additionally all arguments given in `commonArguments` are now enforced optional. + */ + readonly arguments: ReadonlyArray, 'type'>, + CommonArguments /* we cannot use mark optional / some-partial constructions here as it trips up the type checker, hence we work around by union inclusion */ + > & Partial>>; } -export function executeCompoundQueries>(query: CompoundQueryFormat): QueryWithType[] { - const results: QueryWithType[] = []; +/** + * Execute a single, virtual compound query in terms of unfolding the contained queries. + */ +export function executeCompoundQueries< + SubQueryType extends SupportedQueryTypes, + CommonArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +>( + query: CompoundQueryFormat +): QueryArgumentsWithType[] { + const results: QueryArgumentsWithType[] = []; for(const arg of query.arguments) { results.push({ type: query.query, + ...query.commonArguments, ...arg - } as QueryWithType); + } as unknown as QueryArgumentsWithType); } return results; } + diff --git a/src/queries/virtual-query/virtual-queries.ts b/src/queries/virtual-query/virtual-queries.ts index 44eee52030..0ddeac28d4 100644 --- a/src/queries/virtual-query/virtual-queries.ts +++ b/src/queries/virtual-query/virtual-queries.ts @@ -1,17 +1,20 @@ -import type { QueryWithType, SupportedQueryTypes } from '../query'; -import type { CompoundQueryFormat } from './compound-query'; +import type { QueryArgumentsWithType, SupportedQueryTypes } from '../query'; +import type { CompoundQueryFormat, VirtualCompoundConstraint } from './compound-query'; import { executeCompoundQueries } from './compound-query'; import type { BaseQueryFormat } from '../base-query-format'; /** A query that does not perform a search but may perform (e.g., convenience) modifications of other queries */ -export type VirtualQuery = CompoundQueryFormat; +export type VirtualQueryArgumentsWithType< + Base extends SupportedQueryTypes, + VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +> = CompoundQueryFormat; /* Each executor receives all queries of its type in case it wants to avoid repeated traversal */ export type VirtualQueryExecutor = (query: Query) => Result; type SupportedVirtualQueries = { - [QueryType in VirtualQuery['type']]: VirtualQueryExecutor, BaseQueryFormat[]> + [QueryType in VirtualQueryArgumentsWithType['type']]: VirtualQueryExecutor, BaseQueryFormat[]> } export const SupportedVirtualQueries = { diff --git a/test/functionality/_helper/label.ts b/test/functionality/_helper/label.ts index 8d25841c68..400a029d81 100644 --- a/test/functionality/_helper/label.ts +++ b/test/functionality/_helper/label.ts @@ -20,7 +20,7 @@ const uniqueTestId = (() => { })(); -const TestLabelContexts = ['parse', 'desugar', 'dataflow', 'other', 'slice', 'output', 'lineage'] as const; +const TestLabelContexts = ['parse', 'desugar', 'dataflow', 'other', 'slice', 'output', 'lineage', 'query'] as const; export type TestLabelContext = typeof TestLabelContexts[number] export interface TestLabel extends MergeableRecord { @@ -40,8 +40,8 @@ export interface TestLabel extends MergeableRecord { * @param context - the context in which the test is run, if not given this returns the label information for a test-helper to attach it */ export function label(testname: string, ids: readonly SupportedFlowrCapabilityId[], context: readonly TestLabelContext[]): string -export function label(testname: string, ids: readonly SupportedFlowrCapabilityId[], context?: readonly TestLabelContext[]): TestLabel -export function label(testname: string, ids: readonly SupportedFlowrCapabilityId[], context?: readonly TestLabelContext[]): TestLabel | string { +export function label(testname: string, ids?: readonly SupportedFlowrCapabilityId[], context?: readonly TestLabelContext[]): TestLabel +export function label(testname: string, ids?: readonly SupportedFlowrCapabilityId[], context?: readonly TestLabelContext[]): TestLabel | string { const capabilities: Set = new Set(ids); const label: TestLabel = { id: uniqueTestId(), @@ -50,8 +50,12 @@ export function label(testname: string, ids: readonly SupportedFlowrCapabilityId context: context === undefined ? new Set() : new Set(context) }; - for(const i of capabilities) { - TheGlobalLabelMap.get(i).push(label); + if(capabilities.size > 0) { + for(const i of capabilities) { + TheGlobalLabelMap.get(i).push(label); + } + } else { + TheGlobalLabelMap.get('.').push(label); } if(context === undefined) { diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts new file mode 100644 index 0000000000..1ffc057cac --- /dev/null +++ b/test/functionality/_helper/query.ts @@ -0,0 +1,72 @@ +import type { RShell } from '../../../src/r-bridge/shell'; + + +import { PipelineExecutor } from '../../../src/core/pipeline-executor'; +import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/default-pipelines'; +import { requestFromInput } from '../../../src/r-bridge/retriever'; +import { deterministicCountingIdGenerator } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; +import type { QueriesResult, Query } from '../../../src/queries/query'; +import { executeQueries } from '../../../src/queries/query'; +import { assert } from 'chai'; +import type { VirtualQueryArgumentsWithType } from '../../../src/queries/virtual-query/virtual-queries'; +import type { TestLabel } from './label'; +import { decorateLabelContext } from './label'; +import type { VirtualCompoundConstraint } from '../../../src/queries/virtual-query/compound-query'; + +type OmitFromValues = { + [P in keyof T]: Omit +} + +function normalizeResults(result: QueriesResult): OmitFromValues, '.meta'>, '.meta'> { + const normalized = {} as OmitFromValues, '.meta'>, '.meta'>; + for(const key of Object.keys(result) as (keyof QueriesResult)[]) { + if(key === '.meta') { + continue; + } + const normalizedChild = {} as Omit, '.meta'>[typeof key]; + for(const childKey of Object.keys(result[key]) as (keyof QueriesResult[typeof key])[]) { + if(childKey === '.meta') { + continue; + } + normalizedChild[childKey] = result[key][childKey]; + } + normalized[key] = normalizedChild; + } + return normalized; +} + +/** + * Asserts the result of a query + * + * @param name - Name of the test case to generate + * @param shell - R Shell Session to use + * @param code - R code to execute the query on + * @param queries - Queries to execute + * @param expected - Expected result of the queries (without attached meta-information like timing) + */ +export function assertQuery< + Queries extends Query, + VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +>(name: string | TestLabel, shell: RShell, code: string, queries: readonly (Queries | VirtualQueryArgumentsWithType)[], expected: + OmitFromValues, '.meta'>, '.meta'> +) { + const effectiveName = decorateLabelContext(name, ['query']); + + it(effectiveName, async() => { + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(code), + getId: deterministicCountingIdGenerator(0) + }).allRemainingSteps(); + + const graph = info.dataflow.graph; + const result = executeQueries(graph, queries); + + // TODO: demote to logger + console.log(`total query time: ${result['.meta'].timing.toFixed(1)}ms (~1ms accuracy)`); + const normalized = normalizeResults(result); + + /* expect them to be deeply equal */ + assert.deepStrictEqual(normalized, expected, 'The result of the call context query does not match the expected result'); + }); +} diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index fe4b03c906..f523cb9788 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -1,55 +1,32 @@ -import type { - CallContextQuery, - CallContextQueryResult } from '../../../../src/queries/call-context-query/call-context-query-format'; import { CallTargets } from '../../../../src/queries/call-context-query/call-context-query-format'; - - -import { PipelineExecutor } from '../../../../src/core/pipeline-executor'; -import type { RShell } from '../../../../src/r-bridge/shell'; -import { DEFAULT_DATAFLOW_PIPELINE } from '../../../../src/core/steps/pipeline/default-pipelines'; -import { requestFromInput } from '../../../../src/r-bridge/retriever'; -import { deterministicCountingIdGenerator } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; -import { executeQueries } from '../../../../src/queries/query'; import { withShell } from '../../_helper/shell'; -import { assert } from 'chai'; - -function test(name: string, shell: RShell, code: string, queries: readonly CallContextQuery[], expected: CallContextQueryResult) { - /* TODO: labels */ - it(name, async() => { - const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { - shell, - request: requestFromInput(code), - getId: deterministicCountingIdGenerator(0) - }).allRemainingSteps(); +import { assertQuery } from '../../_helper/query'; +import { label } from '../../_helper/label'; - const graph = info.dataflow.graph; - const { 'call-context': result } = executeQueries(graph, queries); - /* expect them to be deeply equal */ - assert.deepStrictEqual(result, expected, 'The result of the call context query does not match the expected result'); - }); -} /** TODO: check what happens if builtin if may be override */ describe('Call Context Query', withShell(shell => { - test('Print calls', shell, 'print(1)', [{ - type: 'call-context', - callName: /print/, - kind: 'visualize', - subkind: 'print', - callTargets: CallTargets.OnlyGlobal - }], { - type: 'call-context', - kinds: { - 'visualize': { - subkinds: { - 'print': [{ - id: 3, - calls: [] - }] + assertQuery(label('Print calls'), + shell, 'print(1)', [{ + type: 'call-context', + callName: /print/, + kind: 'visualize', + subkind: 'print', + callTargets: CallTargets.OnlyGlobal + }], { + 'call-context': { + kinds: { + 'visualize': { + subkinds: { + 'print': [{ + id: 3, + calls: [] + }] + } + } } } - } - }); + }); })); diff --git a/test/functionality/dataflow/query/compound-query-tests.ts b/test/functionality/dataflow/query/compound-query-tests.ts new file mode 100644 index 0000000000..847601046e --- /dev/null +++ b/test/functionality/dataflow/query/compound-query-tests.ts @@ -0,0 +1,35 @@ + + +import { withShell } from '../../_helper/shell'; +import { assertQuery } from '../../_helper/query'; +import { label } from '../../_helper/label'; + + +describe('Compound Query', withShell(shell => { + assertQuery(label('Compound Virtual Query'), + shell, 'print(1); foo(2)', [{ + type: 'compound', + query: 'call-context', + commonArguments: { + kind: 'visualize', + subkind: 'print' + }, + arguments: [{ + callName: /print/, + }, { + callName: /foo/, + }] + }], { + 'call-context': { + kinds: { + 'visualize': { + subkinds: { + 'print': [{ + id: 3, + }] + } + } + } + } + }); +})); From 53c03e0412d056f20a7cf92214761321281ac8ed Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 21 Sep 2024 17:23:30 +0200 Subject: [PATCH 08/41] wip: working on built-in detection for queries --- src/dataflow/environments/clone.ts | 3 +- src/dataflow/environments/environment.ts | 5 +- src/dataflow/environments/resolve-by-name.ts | 18 ++- src/dataflow/graph/dataflowgraph-builder.ts | 3 + src/dataflow/graph/graph.ts | 9 +- .../functions/call/named-call-handling.ts | 6 + .../call-context-query-executor.ts | 44 +++++--- .../call-context-query-format.ts | 6 +- src/queries/query.ts | 17 ++- src/statistics/features/feature.ts | 2 +- .../supported/expression-list/post-process.ts | 2 +- ...-list.ts => statistics-expression-list.ts} | 0 test/functionality/_helper/label.ts | 6 +- test/functionality/_helper/query.ts | 19 ++-- test/functionality/benchmark/slicer.spec.ts | 4 +- .../query/call-context-query-tests.ts | 105 ++++++++++++++---- .../dataflow/query/compound-query-tests.ts | 4 +- test/functionality/lineage/lineage.spec.ts | 3 +- 18 files changed, 182 insertions(+), 74 deletions(-) rename src/statistics/features/supported/expression-list/{expression-list.ts => statistics-expression-list.ts} (100%) diff --git a/src/dataflow/environments/clone.ts b/src/dataflow/environments/clone.ts index 7b09a2f665..3bae7eb644 100644 --- a/src/dataflow/environments/clone.ts +++ b/src/dataflow/environments/clone.ts @@ -15,7 +15,8 @@ function cloneEnvironment(environment: IEnvironment | undefined, recurseParents: } else if(environment.id === BuiltInEnvironment.id) { return BuiltInEnvironment; } - const clone = new Environment(recurseParents ? cloneEnvironment(environment.parent, recurseParents) : environment.parent); + /* make sure the clone has the same id */ + const clone = new Environment(recurseParents ? cloneEnvironment(environment.parent, recurseParents) : environment.parent, environment.id); clone.memory = new Map(JSON.parse(JSON.stringify([...environment.memory])) as [Identifier, IdentifierDefinition[]][]); return clone; } diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index 51f0f03935..2c4869de86 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -60,12 +60,13 @@ export interface IEnvironment { let environmentIdCounter = 0; export class Environment implements IEnvironment { - readonly id = environmentIdCounter++; + readonly id; parent: IEnvironment; memory: Map; - constructor(parent: IEnvironment) { + constructor(parent: IEnvironment, id?: number) { this.parent = parent; + this.id = id ?? environmentIdCounter++; this.memory = new Map(); } } diff --git a/src/dataflow/environments/resolve-by-name.ts b/src/dataflow/environments/resolve-by-name.ts index 3f97e173cf..75a741230e 100644 --- a/src/dataflow/environments/resolve-by-name.ts +++ b/src/dataflow/environments/resolve-by-name.ts @@ -2,8 +2,10 @@ import type { IEnvironment, REnvironmentInformation } from './environment'; import { BuiltInEnvironment } from './environment'; import { Ternary } from '../../util/logic'; import type { Identifier, IdentifierDefinition } from './identifier'; +import { happensInEveryBranch } from '../info'; +// TODO: cache this! => promote environments to classes /** * Resolves a given identifier name to a list of its possible definition location using R scoping and resolving rules. * @@ -14,15 +16,27 @@ import type { Identifier, IdentifierDefinition } from './identifier'; */ export function resolveByName(name: Identifier, environment: REnvironmentInformation): IdentifierDefinition[] | undefined { let current: IEnvironment = environment.current; + let definitions: IdentifierDefinition[] | undefined = undefined; do{ const definition = current.memory.get(name); if(definition !== undefined) { - return definition; + /* TODO: guard for other control dependencies which are set? */ + if(definition.every(d => happensInEveryBranch(d.controlDependencies))) { + return definition; + } else { + definitions ??= []; + definitions.push(...definition); + } } current = current.parent; } while(current.id !== BuiltInEnvironment.id); - return current.memory.get(name); + const builtIns = current.memory.get(name); + if(definitions) { + return builtIns === undefined ? definitions : [...definitions, ...builtIns]; + } else { + return builtIns; + } } export function resolvesToBuiltInConstant(name: Identifier | undefined, environment: REnvironmentInformation, wantedValue: unknown): Ternary { diff --git a/src/dataflow/graph/dataflowgraph-builder.ts b/src/dataflow/graph/dataflowgraph-builder.ts index fe2ac4448e..77a8c57ced 100644 --- a/src/dataflow/graph/dataflowgraph-builder.ts +++ b/src/dataflow/graph/dataflowgraph-builder.ts @@ -96,6 +96,9 @@ export class DataflowGraphBuilder extends DataflowGraph { this.reads(id, call); } } + if(info?.onlyBuiltIn || onlyBuiltInAuto) { + this.calls(id, BuiltIn); + } return this; } diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index 8ba640dac2..de4d6817aa 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -21,7 +21,7 @@ import type { REnvironmentInformation } from '../environments/environment'; import { initializeCleanEnvironments } from '../environments/environment'; import type { AstIdMap } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'; import { cloneEnvironmentInformation } from '../environments/clone'; -import { BuiltIn } from '../environments/built-in'; +import { jsonReplacer } from '../../util/json'; export type DataflowFunctionFlowInformation = Omit & { graph: Set } @@ -257,13 +257,12 @@ export class DataflowGraph< * Will insert a new edge into the graph, * if the direction of the edge is of no importance (`same-read-read` or `same-def-def`), source * and target will be sorted so that `from` has the lower, and `to` the higher id (default ordering). - * Please note that this will never make edges to {@link BuiltIn} as they are not part of the graph. */ public addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, edgeInfo: EdgeData): this { const { fromId, toId } = extractEdgeIds(from, to); const { type, ...rest } = edgeInfo; - if(fromId === toId || toId === BuiltIn) { + if(fromId === toId) { return this; } @@ -410,8 +409,8 @@ export class DataflowGraph< } function mergeNodeInfos(current: Vertex, next: Vertex): Vertex { - guard(current.tag === next.tag, () => `nodes to be joined for the same id must have the same tag, but ${JSON.stringify(current)} vs ${JSON.stringify(next)}`); - guard(current.environment === next.environment, 'nodes to be joined for the same id must have the same environment'); + guard(current.tag === next.tag, () => `nodes to be joined for the same id must have the same tag, but ${JSON.stringify(current, jsonReplacer)} vs ${JSON.stringify(next, jsonReplacer)}`); + guard(current.environment?.current.id === next.environment?.current.id, () => `nodes to be joined for the same id must have the same environment, but not for: ${JSON.stringify(current, jsonReplacer)} vs ${JSON.stringify(next, jsonReplacer)}`); if(current.tag === 'variable-definition') { guard(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope'); diff --git a/src/dataflow/internal/process/functions/call/named-call-handling.ts b/src/dataflow/internal/process/functions/call/named-call-handling.ts index 0dd8ed014f..1c395d701c 100644 --- a/src/dataflow/internal/process/functions/call/named-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/named-call-handling.ts @@ -9,6 +9,7 @@ import type { RSymbol } from '../../../../../r-bridge/lang-4.x/ast/model/nodes/r import type { NodeId } from '../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { resolveByName } from '../../../../environments/resolve-by-name'; import { VertexType } from '../../../../graph/vertex'; +import { EdgeType } from '../../../../graph/edge'; function mergeInformation(info: DataflowInformation | undefined, newInfo: DataflowInformation): DataflowInformation { @@ -56,6 +57,11 @@ export function processNamedCall( if(resolvedFunction.kind === 'built-in-function') { builtIn = true; information = mergeInformation(information, resolvedFunction.processor(name, args, rootId, data)); + /* add the built-in resolve edge */ + const vert = information.graph.getVertex(rootId); + if(vert && vert.tag === VertexType.FunctionCall) { + information.graph.addEdge(rootId, resolvedFunction.nodeId, { type: EdgeType.Calls }); + } } else { defaultProcessor = true; } diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index e8da5131f3..7cf5f351a7 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -53,24 +53,27 @@ class TwoLayerCollector { } } -function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | false { +function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | 'no' { const callVertex = graph.get(id); if(callVertex === undefined) { - return false; + return 'no'; } const [,outgoing] = callVertex; - const callTargets: NodeId[] = [...outgoing] + const baseCallTargets = [...outgoing] .filter(([, e]) => edgeIncludesType(e.types, EdgeType.Calls)) - .map(([t]) => t) ; + console.log(baseCallTargets); + + const callTargets = baseCallTargets.map(([t]) => t); + switch(callTarget) { case CallTargets.Any: return callTargets; case CallTargets.OnlyGlobal: - return callTargets.length === 0 ? callTargets : false; + return callTargets.length === 0 ? callTargets : 'no'; case CallTargets.OnlyLocal: - return callTargets.length > 0 ? callTargets : false; + return callTargets.length > 0 ? callTargets : 'no'; default: assertUnreachable(callTarget); } @@ -85,10 +88,12 @@ function makeReport(collector: TwoLayerCollector(); + /* promote all strings to regex patterns */ + const promotedQueries = queries.map(q => ({ + ...q, + callName: new RegExp(q.callName) + })); + + for(const [node, info] of graph.vertices(true)) { if(info.tag !== VertexType.FunctionCall) { continue; } - for(const query of queries.filter(q => q.callName.test(info.name))) { - let targets: NodeId[] | false = false; + for(const query of promotedQueries.filter(q => q.callName.test(info.name))) { + let targets: NodeId[] | 'no' | undefined = undefined; if(query.callTargets) { targets = satisfiesCallTargets(node, graph, query.callTargets); - if(targets === false) { + if(targets === 'no') { continue; } } - if(targets === false) { - initialIdCollector.add(query.kind, query.subkind, [node]); - } else { + if(targets) { initialIdCollector.add(query.kind, query.subkind, [node, targets]); + } else { + initialIdCollector.add(query.kind, query.subkind, [node]); } } } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 389e373495..61dadca3bf 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -12,8 +12,8 @@ export const enum CallTargets { export interface DefaultCallContextQueryFormat extends BaseQueryFormat { readonly type: 'call-context'; - /** Regex regarding the function name */ - readonly callName: RegExp; + /** Regex regarding the function name, please note that strings will be interpreted as regular expressions too! */ + readonly callName: RegExp | string; /** kind may be a step or anything that you attach to the call, this can be used to group calls together (e.g., linking `ggplot` to `visualize`) */ readonly kind: string; /** subkinds are used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`) */ @@ -44,7 +44,7 @@ interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { } -interface CallContextQuerySubKindResult { +export interface CallContextQuerySubKindResult { readonly id: NodeId; /** * Ids of functions which are called by the respective function call, diff --git a/src/queries/query.ts b/src/queries/query.ts index 9a2af22958..d659d8cbac 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -70,22 +70,29 @@ function groupQueriesByType< } /* a record mapping the query type present to its respective result */ -export type QueriesResult = { +export type QueryResults = { readonly [QueryType in Base]: QueryResult } & BaseQueryResult + +type OmitFromValues = { + [P in keyof T]: Omit +} + +export type QueryResultsWithoutMeta = OmitFromValues, '.meta'>, '.meta'>; + export function executeQueries< Base extends SupportedQueryTypes, VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint ->(graph: DataflowGraph, queries: readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]): QueriesResult { +>(graph: DataflowGraph, queries: readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]): QueryResults { const now = Date.now(); const grouped = groupQueriesByType(queries); - const results = {} as Writable>; + const results = {} as Writable>; for(const type of Object.keys(grouped) as Base[]) { - results[type] = executeQueriesOfSameType(graph, ...grouped[type]) as QueriesResult[Base]; + results[type] = executeQueriesOfSameType(graph, ...grouped[type]) as QueryResults[Base]; } results['.meta'] = { timing: Date.now() - now }; - return results as QueriesResult; + return results as QueryResults; } diff --git a/src/statistics/features/feature.ts b/src/statistics/features/feature.ts index 4d0be120f4..5f64424c58 100644 --- a/src/statistics/features/feature.ts +++ b/src/statistics/features/feature.ts @@ -21,7 +21,7 @@ import { assignments } from './supported/assignments/assignments'; import { loops } from './supported/loops/loops'; import { controlflow } from './supported/control-flow/control-flow'; import { dataAccess } from './supported/data-access/data-access'; -import { expressionList } from './supported/expression-list/expression-list'; +import { expressionList } from './supported/expression-list/statistics-expression-list'; import { variables } from './supported/variables/variables'; import type { Document } from '@xmldom/xmldom'; diff --git a/src/statistics/features/supported/expression-list/post-process.ts b/src/statistics/features/supported/expression-list/post-process.ts index 7b6f7657c0..b3e3c42580 100644 --- a/src/statistics/features/supported/expression-list/post-process.ts +++ b/src/statistics/features/supported/expression-list/post-process.ts @@ -7,7 +7,7 @@ import { emptySummarizedWithProject, recordFilePath } from '../../post-processing'; -import type { ExpressionListInfo } from './expression-list'; +import type { ExpressionListInfo } from './statistics-expression-list'; import fs from 'fs'; import path from 'path'; import type { StatisticsSummarizerConfiguration } from '../../../summarizer/summarizer'; diff --git a/src/statistics/features/supported/expression-list/expression-list.ts b/src/statistics/features/supported/expression-list/statistics-expression-list.ts similarity index 100% rename from src/statistics/features/supported/expression-list/expression-list.ts rename to src/statistics/features/supported/expression-list/statistics-expression-list.ts diff --git a/test/functionality/_helper/label.ts b/test/functionality/_helper/label.ts index 400a029d81..743c6ffa0c 100644 --- a/test/functionality/_helper/label.ts +++ b/test/functionality/_helper/label.ts @@ -66,7 +66,11 @@ export function label(testname: string, ids?: readonly SupportedFlowrCapabilityI } function getFullNameOfLabel(label: TestLabel): string { - return `#${label.id} ${label.name} [${[...label.capabilities].join(', ')}]`; + if(label.capabilities.size === 0) { + return `#${label.id} ${label.name}`; + } else { + return `#${label.id} ${label.name} [${[...label.capabilities].join(', ')}]`; + } } diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 1ffc057cac..0aaf48c8bb 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -5,7 +5,7 @@ import { PipelineExecutor } from '../../../src/core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/default-pipelines'; import { requestFromInput } from '../../../src/r-bridge/retriever'; import { deterministicCountingIdGenerator } from '../../../src/r-bridge/lang-4.x/ast/model/processing/decorate'; -import type { QueriesResult, Query } from '../../../src/queries/query'; +import type { QueryResults, Query, QueryResultsWithoutMeta } from '../../../src/queries/query'; import { executeQueries } from '../../../src/queries/query'; import { assert } from 'chai'; import type { VirtualQueryArgumentsWithType } from '../../../src/queries/virtual-query/virtual-queries'; @@ -13,18 +13,15 @@ import type { TestLabel } from './label'; import { decorateLabelContext } from './label'; import type { VirtualCompoundConstraint } from '../../../src/queries/virtual-query/compound-query'; -type OmitFromValues = { - [P in keyof T]: Omit -} -function normalizeResults(result: QueriesResult): OmitFromValues, '.meta'>, '.meta'> { - const normalized = {} as OmitFromValues, '.meta'>, '.meta'>; - for(const key of Object.keys(result) as (keyof QueriesResult)[]) { +function normalizeResults(result: QueryResults): QueryResultsWithoutMeta { + const normalized = {} as QueryResultsWithoutMeta; + for(const key of Object.keys(result) as (keyof QueryResults)[]) { if(key === '.meta') { continue; } - const normalizedChild = {} as Omit, '.meta'>[typeof key]; - for(const childKey of Object.keys(result[key]) as (keyof QueriesResult[typeof key])[]) { + const normalizedChild = {} as Omit, '.meta'>[typeof key]; + for(const childKey of Object.keys(result[key]) as (keyof QueryResults[typeof key])[]) { if(childKey === '.meta') { continue; } @@ -48,7 +45,7 @@ export function assertQuery< Queries extends Query, VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint >(name: string | TestLabel, shell: RShell, code: string, queries: readonly (Queries | VirtualQueryArgumentsWithType)[], expected: - OmitFromValues, '.meta'>, '.meta'> + QueryResultsWithoutMeta ) { const effectiveName = decorateLabelContext(name, ['query']); @@ -63,7 +60,7 @@ export function assertQuery< const result = executeQueries(graph, queries); // TODO: demote to logger - console.log(`total query time: ${result['.meta'].timing.toFixed(1)}ms (~1ms accuracy)`); + console.log(`total query time: ${result['.meta'].timing.toFixed(0)}ms (~1ms accuracy)`); const normalized = normalizeResults(result); /* expect them to be deeply equal */ diff --git a/test/functionality/benchmark/slicer.spec.ts b/test/functionality/benchmark/slicer.spec.ts index 3ce6c318ea..9589324ebd 100644 --- a/test/functionality/benchmark/slicer.spec.ts +++ b/test/functionality/benchmark/slicer.spec.ts @@ -48,7 +48,7 @@ describe('Benchmark Slicer', () => { assert.deepStrictEqual(stats.dataflow, { numberOfNodes: 3, // the defined variable, the reading ref, and the call - numberOfEdges: 4, // the defined-by edge and the arguments + numberOfEdges: 5, // the defined-by edge and the arguments + built in read numberOfCalls: 1, // `<-` numberOfFunctionDefinitions: 0, // no definitions sizeOfObject: 380 @@ -115,7 +115,7 @@ cat(d)` }, statInfo); assert.deepStrictEqual(stats.dataflow, { numberOfNodes: 23, - numberOfEdges: 29, + numberOfEdges: 38, numberOfCalls: 9, numberOfFunctionDefinitions: 0, sizeOfObject: 3053 diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index f523cb9788..192e9d7423 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -1,32 +1,97 @@ +import type { + CallContextQuery, + CallContextQueryKindResult, + CallContextQuerySubKindResult } from '../../../../src/queries/call-context-query/call-context-query-format'; import { CallTargets } from '../../../../src/queries/call-context-query/call-context-query-format'; + + import { withShell } from '../../_helper/shell'; import { assertQuery } from '../../_helper/query'; import { label } from '../../_helper/label'; +import type { QueryResultsWithoutMeta } from '../../../../src/queries/query'; + +/** simple query shortcut */ +function q(callName: RegExp | string, c: Partial = {}): CallContextQuery { + return { + type: 'call-context', + kind: 'test-kind', + subkind: 'test-subkind', + callName: callName, + ...c + }; +} + +function baseResult(kinds: CallContextQueryKindResult): QueryResultsWithoutMeta { + return { + 'call-context': { + kinds + } + }; +} + +/** simple result shortcut */ +function r(results: readonly CallContextQuerySubKindResult[], kind = 'test-kind', subkind = 'test-subkind'): QueryResultsWithoutMeta { + return baseResult({ + [kind]: { + subkinds: { + [subkind]: results + } + } + }); +} -/** TODO: check what happens if builtin if may be override */ +/* TODO: check what happens if builtin if may be override */ +// TODO: documentation describe('Call Context Query', withShell(shell => { - assertQuery(label('Print calls'), - shell, 'print(1)', [{ - type: 'call-context', - callName: /print/, - kind: 'visualize', - subkind: 'print', - callTargets: CallTargets.OnlyGlobal - }], { - 'call-context': { - kinds: { - 'visualize': { - subkinds: { - 'print': [{ - id: 3, - calls: [] - }] - } - } + function testQuery(name: string, code: string, query: readonly CallContextQuery[], expected: QueryResultsWithoutMeta) { + assertQuery(label(name), shell, code, query, expected); + } + testQuery('No Call', '1', [q(/print/)], baseResult({})); + testQuery('No Call (Symbol)', 'print', [q(/print/)], baseResult({})); + testQuery('No Call (Symbol)', 'print <- 3', [q(/print/)], baseResult({})); + testQuery('No Wanted Call', 'cat()', [q(/print/)], baseResult({})); + describe('Local Targets', () => { + testQuery('Happy Foo(t)', 'foo <- function(){}\nfoo()', [q(/foo/)], r([{ id: 7 }])); + testQuery('Happy Foo(t) (only local)', 'foo <- function(){}\nfoo()', [q(/foo/, { callTargets: CallTargets.OnlyLocal })], r([{ id: 7, calls: [4] }])); + testQuery('Happy Foo(t) (only global)', 'foo <- function(){}\nfoo()', [q(/foo/, { callTargets: CallTargets.OnlyGlobal })], baseResult({})); + testQuery('Happy Foo(t) (two local candidates)', 'if(x) { foo <- function(){} } else { foo <- function(){} }\nfoo()', [q(/foo/, { callTargets: CallTargets.OnlyLocal })], r([{ id: 21, calls: [16, 7] }])); + }); + describe('Global Targets', () => { + testQuery('Print calls', 'print(1)', [q(/print/)], r([{ id: 3 }])); + testQuery('Non-Alph calls', 'x <- 2', [q(/<-/)], r([{ id: 2 }])); + testQuery('Built-In calls', 'if(x) 3 else 2', [q(/if/)], r([{ id: 5 }])); + testQuery('Multiple wanted Calls', 'print(1); print(2)', [q(/print/)], r([{ id: 3 }, { id: 7 }])); + testQuery('Print calls (global)', 'print(1)', [q(/print/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [] }])); + testQuery('Higher-Order Calls', 'lapply(c(1,2,3),print)', [q(/print/)], r([{ id: 10 }])); + }); + /* TODO: normal test for maybe scope overshadow: x <- 3, f <- function() { if(y) { x <- 2; } print(x) }, f() */ + // TODO: local and global; nested calls, using quote, ... + describe('Mixed Targets', () => { + const code = 'if(x) { print <- function() {} }\nprint()'; + testQuery('May be local or global', code, [q(/print/)], r([{ id: 12 }])); + testQuery('May be local or global (only local)', code, [q(/print/, { callTargets: CallTargets.OnlyLocal })], baseResult({})); + testQuery('May be local or global (only global)', code, [q(/print/, { callTargets: CallTargets.OnlyGlobal })], baseResult({})); + }); + describe('Linked Calls', () => { + // with one finding its parent, and one that does not + }); + describe('Multiple Kinds', () => { + testQuery('Multiple Kinds', 'print(1); foo(2)', [q(/print/, { kind: 'print-kind' }), q(/foo/, { kind: 'foo-kind' })], baseResult({ + 'print-kind': { subkinds: { 'test-subkind': [{ id: 3 }] } }, + 'foo-kind': { subkinds: { 'test-subkind': [{ id: 7 }] } } + })); + }); + describe('Multiple Sub-Kinds', () => { + testQuery('Multiple Sub-Kinds', 'print(1); foo(2)', [q(/print/, { subkind: 'print-subkind' }), q(/foo/, { subkind: 'foo-subkind' })], baseResult({ + 'test-kind': { + subkinds: { + 'print-subkind': [{ id: 3 }], + 'foo-subkind': [{ id: 7 }] } } - }); + })); + }); })); diff --git a/test/functionality/dataflow/query/compound-query-tests.ts b/test/functionality/dataflow/query/compound-query-tests.ts index 847601046e..f524fa96f5 100644 --- a/test/functionality/dataflow/query/compound-query-tests.ts +++ b/test/functionality/dataflow/query/compound-query-tests.ts @@ -24,9 +24,7 @@ describe('Compound Query', withShell(shell => { kinds: { 'visualize': { subkinds: { - 'print': [{ - id: 3, - }] + 'print': [{ id: 3 }, { id: 7 }] } } } diff --git a/test/functionality/lineage/lineage.spec.ts b/test/functionality/lineage/lineage.spec.ts index 5bbd6a8a5d..1bde7659ec 100644 --- a/test/functionality/lineage/lineage.spec.ts +++ b/test/functionality/lineage/lineage.spec.ts @@ -10,6 +10,7 @@ import type { NodeId } from '../../../src/r-bridge/lang-4.x/ast/model/processing import { assert } from 'chai'; import { setEquals } from '../../../src/util/set'; import { OperatorDatabase } from '../../../src/r-bridge/lang-4.x/ast/model/operators'; +import { BuiltIn } from '../../../src/dataflow/environments/built-in'; describe('Test lineage', withShell(shell => { @@ -30,5 +31,5 @@ describe('Test lineage', withShell(shell => { 'name-normal', ...OperatorDatabase['<-'].capabilities, 'newlines' ]), `c <- x b <- c -a <- b`, '3@a', [0, 1, 2, 3, 4, 5, 6, 7, 8]); +a <- b`, '3@a', [0, 1, 2, 3, 4, 5, 6, 7, 8, BuiltIn]); })); From 7bab1878238bcd81a96bb44e0d4c94979f64b707 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 21 Sep 2024 21:39:41 +0200 Subject: [PATCH 09/41] feat: only(Global|Local) call targets --- src/dataflow/graph/dataflowgraph-builder.ts | 3 -- src/dataflow/graph/graph.ts | 4 ++- .../functions/call/named-call-handling.ts | 6 ---- .../call-context-query-executor.ts | 35 ++++++++++++++----- test/functionality/benchmark/slicer.spec.ts | 4 +-- test/functionality/lineage/lineage.spec.ts | 3 +- 6 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/dataflow/graph/dataflowgraph-builder.ts b/src/dataflow/graph/dataflowgraph-builder.ts index 77a8c57ced..fe2ac4448e 100644 --- a/src/dataflow/graph/dataflowgraph-builder.ts +++ b/src/dataflow/graph/dataflowgraph-builder.ts @@ -96,9 +96,6 @@ export class DataflowGraphBuilder extends DataflowGraph { this.reads(id, call); } } - if(info?.onlyBuiltIn || onlyBuiltInAuto) { - this.calls(id, BuiltIn); - } return this; } diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index de4d6817aa..e6bfa33d1f 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -22,6 +22,7 @@ import { initializeCleanEnvironments } from '../environments/environment'; import type { AstIdMap } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'; import { cloneEnvironmentInformation } from '../environments/clone'; import { jsonReplacer } from '../../util/json'; +import { BuiltIn } from '../environments/built-in'; export type DataflowFunctionFlowInformation = Omit & { graph: Set } @@ -257,12 +258,13 @@ export class DataflowGraph< * Will insert a new edge into the graph, * if the direction of the edge is of no importance (`same-read-read` or `same-def-def`), source * and target will be sorted so that `from` has the lower, and `to` the higher id (default ordering). + * Please note that this will never make edges to {@link BuiltIn} as they are not part of the graph. */ public addEdge(from: NodeId | ReferenceForEdge, to: NodeId | ReferenceForEdge, edgeInfo: EdgeData): this { const { fromId, toId } = extractEdgeIds(from, to); const { type, ...rest } = edgeInfo; - if(fromId === toId) { + if(fromId === toId || toId === BuiltIn) { return this; } diff --git a/src/dataflow/internal/process/functions/call/named-call-handling.ts b/src/dataflow/internal/process/functions/call/named-call-handling.ts index 1c395d701c..0dd8ed014f 100644 --- a/src/dataflow/internal/process/functions/call/named-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/named-call-handling.ts @@ -9,7 +9,6 @@ import type { RSymbol } from '../../../../../r-bridge/lang-4.x/ast/model/nodes/r import type { NodeId } from '../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { resolveByName } from '../../../../environments/resolve-by-name'; import { VertexType } from '../../../../graph/vertex'; -import { EdgeType } from '../../../../graph/edge'; function mergeInformation(info: DataflowInformation | undefined, newInfo: DataflowInformation): DataflowInformation { @@ -57,11 +56,6 @@ export function processNamedCall( if(resolvedFunction.kind === 'built-in-function') { builtIn = true; information = mergeInformation(information, resolvedFunction.processor(name, args, rootId, data)); - /* add the built-in resolve edge */ - const vert = information.graph.getVertex(rootId); - if(vert && vert.tag === VertexType.FunctionCall) { - information.graph.addEdge(rootId, resolvedFunction.nodeId, { type: EdgeType.Calls }); - } } else { defaultProcessor = true; } diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 7cf5f351a7..72de1d4707 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -11,6 +11,8 @@ import { VertexType } from '../../dataflow/graph/vertex'; import { assertUnreachable } from '../../util/assert'; import { edgeIncludesType, EdgeType } from '../../dataflow/graph/edge'; import type { DeepWritable } from 'ts-essentials'; +import { resolveByName } from '../../dataflow/environments/resolve-by-name'; +import { BuiltIn } from '../../dataflow/environments/built-in'; class TwoLayerCollector { readonly store = new Map>(); @@ -55,25 +57,40 @@ class TwoLayerCollector { function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | 'no' { const callVertex = graph.get(id); - if(callVertex === undefined) { + if(callVertex === undefined || callVertex[0].tag !== VertexType.FunctionCall) { return 'no'; } - const [,outgoing] = callVertex; - const baseCallTargets = [...outgoing] + const [info,outgoing] = callVertex; + const callTargets = [...outgoing] .filter(([, e]) => edgeIncludesType(e.types, EdgeType.Calls)) + .map(([t]) => t) ; - console.log(baseCallTargets); + if(callTarget === CallTargets.Any) { + return callTargets; + } else if(info.environment === undefined) { + /* if there is no environment, we are a built-in only */ + return callTarget === CallTargets.OnlyGlobal ? callTargets : 'no'; + } + + let builtIn = false; + + /* + * for performance and scoping reasons, flowR will not identify the global linkage, + * including any potential built-in mapping. + */ + const reResolved = resolveByName(info.name, info.environment); + if(reResolved && reResolved.some(t => t.definedAt === BuiltIn)) { + builtIn = true; + } - const callTargets = baseCallTargets.map(([t]) => t); + console.log('builtIn', builtIn, callTargets); switch(callTarget) { - case CallTargets.Any: - return callTargets; case CallTargets.OnlyGlobal: - return callTargets.length === 0 ? callTargets : 'no'; + return builtIn && callTargets.length === 0 ? callTargets : 'no'; case CallTargets.OnlyLocal: - return callTargets.length > 0 ? callTargets : 'no'; + return !builtIn && callTargets.length > 0 ? callTargets : 'no'; default: assertUnreachable(callTarget); } diff --git a/test/functionality/benchmark/slicer.spec.ts b/test/functionality/benchmark/slicer.spec.ts index 9589324ebd..3ce6c318ea 100644 --- a/test/functionality/benchmark/slicer.spec.ts +++ b/test/functionality/benchmark/slicer.spec.ts @@ -48,7 +48,7 @@ describe('Benchmark Slicer', () => { assert.deepStrictEqual(stats.dataflow, { numberOfNodes: 3, // the defined variable, the reading ref, and the call - numberOfEdges: 5, // the defined-by edge and the arguments + built in read + numberOfEdges: 4, // the defined-by edge and the arguments numberOfCalls: 1, // `<-` numberOfFunctionDefinitions: 0, // no definitions sizeOfObject: 380 @@ -115,7 +115,7 @@ cat(d)` }, statInfo); assert.deepStrictEqual(stats.dataflow, { numberOfNodes: 23, - numberOfEdges: 38, + numberOfEdges: 29, numberOfCalls: 9, numberOfFunctionDefinitions: 0, sizeOfObject: 3053 diff --git a/test/functionality/lineage/lineage.spec.ts b/test/functionality/lineage/lineage.spec.ts index 1bde7659ec..5bbd6a8a5d 100644 --- a/test/functionality/lineage/lineage.spec.ts +++ b/test/functionality/lineage/lineage.spec.ts @@ -10,7 +10,6 @@ import type { NodeId } from '../../../src/r-bridge/lang-4.x/ast/model/processing import { assert } from 'chai'; import { setEquals } from '../../../src/util/set'; import { OperatorDatabase } from '../../../src/r-bridge/lang-4.x/ast/model/operators'; -import { BuiltIn } from '../../../src/dataflow/environments/built-in'; describe('Test lineage', withShell(shell => { @@ -31,5 +30,5 @@ describe('Test lineage', withShell(shell => { 'name-normal', ...OperatorDatabase['<-'].capabilities, 'newlines' ]), `c <- x b <- c -a <- b`, '3@a', [0, 1, 2, 3, 4, 5, 6, 7, 8, BuiltIn]); +a <- b`, '3@a', [0, 1, 2, 3, 4, 5, 6, 7, 8]); })); From 9a3d2c5f8340fe4d2545e76bb9d52fc9e546a712 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 21 Sep 2024 22:57:48 +0200 Subject: [PATCH 10/41] feat-fix: potential scope-change re-resolve --- .../call/built-in/built-in-expression-list.ts | 4 ++-- .../internal/process/functions/call/common.ts | 10 ++++++++++ .../call-context-query-executor.ts | 6 +++++- .../call-context-query-format.ts | 4 ++++ .../dataflow-function-definition-tests.ts | 17 +++++++++++++++++ 5 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts index 90ae00b76a..84a34fa8af 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts @@ -3,7 +3,7 @@ * @module */ import type { DataflowInformation, ExitPoint } from '../../../../../info'; -import { addNonDefaultExitPoints, alwaysExits, ExitPointType } from '../../../../../info'; +import { happensInEveryBranch , addNonDefaultExitPoints, alwaysExits, ExitPointType } from '../../../../../info'; import type { DataflowProcessorInformation } from '../../../../../processor'; import { processDataflowFor } from '../../../../../processor'; import { linkFunctionCalls } from '../../../../linker'; @@ -34,7 +34,7 @@ function linkReadNameToWriteIfPossible(read: IdentifierReference, environments: const probableTarget = readName ? resolveByName(readName, environments) : undefined; // record if at least one has not been defined - if(probableTarget === undefined || probableTarget.some(t => !listEnvironments.has(t.nodeId))) { + if(probableTarget === undefined || probableTarget.some(t => !listEnvironments.has(t.nodeId) || !happensInEveryBranch(t.controlDependencies))) { if(remainingRead.has(readName)) { remainingRead.get(readName)?.push(read); } else { diff --git a/src/dataflow/internal/process/functions/call/common.ts b/src/dataflow/internal/process/functions/call/common.ts index 380a3d6ed4..54f0107e0a 100644 --- a/src/dataflow/internal/process/functions/call/common.ts +++ b/src/dataflow/internal/process/functions/call/common.ts @@ -1,4 +1,5 @@ import type { DataflowInformation } from '../../../../info'; +import { happensInEveryBranch } from '../../../../info'; import type { DataflowProcessorInformation } from '../../../../processor'; import { processDataflowFor } from '../../../../processor'; import type { RNode } from '../../../../../r-bridge/lang-4.x/ast/model/model'; @@ -61,6 +62,7 @@ function forceVertexArgumentValueReferences(rootId: NodeId, value: DataflowInfor const containedSubflowIn: readonly DataflowGraphVertexFunctionDefinition[] = [...graph.vertices(true)] .filter(([, info]) => isFunctionDefinitionVertex(info)) .flatMap(([, info]) => (info as DataflowGraphVertexFunctionDefinition)); + // try to resolve them against the current environment for(const ref of [...value.in, ...containedSubflowIn.flatMap(n => n.subflow.in)]) { if(ref.name) { @@ -106,9 +108,17 @@ export function processAllArguments( if(tryToResolve === undefined) { remainingReadInArgs.push(ingoing); } else { + /* maybe all targets are not definitely of the current scope and should be still kept */ + let assumeItMayHaveAHigherTarget = true; for(const resolved of tryToResolve) { + if(happensInEveryBranch(resolved.controlDependencies)) { + assumeItMayHaveAHigherTarget = false; + } finalGraph.addEdge(ingoing.nodeId, resolved.nodeId, { type: EdgeType.Reads }); } + if(assumeItMayHaveAHigherTarget) { + remainingReadInArgs.push(ingoing); + } } } argEnv = overwriteEnvironment(argEnv, processed.environment); diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 72de1d4707..b1de49095c 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -70,7 +70,7 @@ function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: Call return callTargets; } else if(info.environment === undefined) { /* if there is no environment, we are a built-in only */ - return callTarget === CallTargets.OnlyGlobal ? callTargets : 'no'; + return callTarget === CallTargets.OnlyGlobal || callTarget === CallTargets.MustIncludeGlobal ? callTargets : 'no'; } let builtIn = false; @@ -89,8 +89,12 @@ function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: Call switch(callTarget) { case CallTargets.OnlyGlobal: return builtIn && callTargets.length === 0 ? callTargets : 'no'; + case CallTargets.MustIncludeGlobal: + return builtIn ? callTargets : 'no'; case CallTargets.OnlyLocal: return !builtIn && callTargets.length > 0 ? callTargets : 'no'; + case CallTargets.MustIncludeLocal: + return callTargets.length > 0 ? callTargets : 'no'; default: assertUnreachable(callTarget); } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 61dadca3bf..76a5ffd3c8 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -4,8 +4,12 @@ import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-i export const enum CallTargets { /** call targets a function that is not defined locally (e.g., the call targets a library function) */ OnlyGlobal = 'global', + /** call targets a function that is defined locally or globally, but must include a global function */ + MustIncludeGlobal = 'must-include-global', /** call targets a function that is defined locally */ OnlyLocal = 'local', + /** call targets a function that is defined locally or globally, but must include a local function */ + MustIncludeLocal = 'must-include-local', /** call targets a function that is defined locally or globally */ Any = 'any' } diff --git a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts index c9bf088ca6..6c912f8145 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts @@ -818,6 +818,23 @@ f(3)`, emptyGraph() expectIsSubgraph: true } ); + assertDataflow(label('Potential overwrite with Scope Change', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'numbers', 'formals-named', 'function-definitions', 'function-calls', 'if']), + shell, `function() { x <- 3 +function() { + if(y) x <- 2 + print(x) +}} + `, emptyGraph() + .defineVariable('1@x', undefined, undefined, false) + .defineVariable('3@x', undefined, { controlDependencies: [{ id: 12, when: true }] }, false) + .reads('4@x', '1@x') + .reads('4@x', '3@x') + , + { + expectIsSubgraph: true, + resolveIdsAsCriterion: true + } + ); }); describe('Reference escaping closures', () => { assertDataflow(label('Closure Factory', ['name-normal', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'implicit-return', 'newlines', 'numbers', 'call-normal']), From 5a305e0ba69b997e911ee5ed05cc73e6da20dd80 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sat, 21 Sep 2024 23:37:08 +0200 Subject: [PATCH 11/41] feat(call-context): handle quoted values --- .../functions/call/built-in/built-in-quote.ts | 4 ++ .../call-context-query-executor.ts | 53 ++++++++++++------- .../query/call-context-query-tests.ts | 15 ++++-- 3 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-quote.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-quote.ts index 05ea8c0013..da256d394c 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-quote.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-quote.ts @@ -31,6 +31,10 @@ export function processQuote( unknownRefs.push(...processedArg.unknownReferences); } else if(processedArg) { information.graph.addEdge(rootId, processedArg.entryPoint, { type: EdgeType.NonStandardEvaluation }); + /* nse actually affects _everything_ within that argument! */ + for(const [vtx,] of processedArg.graph.vertices(true)) { + information.graph.addEdge(rootId, vtx, { type: EdgeType.NonStandardEvaluation }); + } } } diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index b1de49095c..a47c5a1063 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -66,40 +66,53 @@ function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: Call .map(([t]) => t) ; - if(callTarget === CallTargets.Any) { - return callTargets; - } else if(info.environment === undefined) { - /* if there is no environment, we are a built-in only */ - return callTarget === CallTargets.OnlyGlobal || callTarget === CallTargets.MustIncludeGlobal ? callTargets : 'no'; - } - let builtIn = false; - /* - * for performance and scoping reasons, flowR will not identify the global linkage, - * including any potential built-in mapping. - */ - const reResolved = resolveByName(info.name, info.environment); - if(reResolved && reResolved.some(t => t.definedAt === BuiltIn)) { + if(info.environment === undefined) { + /* if we have a call with an unbound environment, + * this only happens if we are sure of built-in relations and want to save references + */ builtIn = true; + } else { + /* + * for performance and scoping reasons, flowR will not identify the global linkage, + * including any potential built-in mapping. + */ + const reResolved = resolveByName(info.name, info.environment); + if(reResolved && reResolved.some(t => t.definedAt === BuiltIn)) { + builtIn = true; + } } - console.log('builtIn', builtIn, callTargets); - switch(callTarget) { + case CallTargets.Any: + return callTargets; case CallTargets.OnlyGlobal: - return builtIn && callTargets.length === 0 ? callTargets : 'no'; + return builtIn && callTargets.length === 0 ? [BuiltIn] : 'no'; case CallTargets.MustIncludeGlobal: - return builtIn ? callTargets : 'no'; + return builtIn ? [...callTargets, BuiltIn] : 'no'; case CallTargets.OnlyLocal: return !builtIn && callTargets.length > 0 ? callTargets : 'no'; case CallTargets.MustIncludeLocal: - return callTargets.length > 0 ? callTargets : 'no'; + if(callTargets.length > 0) { + return builtIn ? [...callTargets, BuiltIn] : callTargets; + } else { + return 'no'; + } default: assertUnreachable(callTarget); } } +/* if the node is effected by nse, we have an ingoing nse edge */ +function isQuoted(node: NodeId, graph: DataflowGraph): boolean { + const vertex = graph.ingoingEdges(node); + if(vertex === undefined) { + return false; + } + return [...vertex.values()].some(({ types }) => edgeIncludesType(types, EdgeType.NonStandardEvaluation)); +} + function makeReport(collector: TwoLayerCollector): CallContextQueryKindResult { const result: CallContextQueryKindResult = {} as unknown as CallContextQueryKindResult; for(const [kind, collected] of collector.store) { @@ -157,6 +170,10 @@ export function executeCallContextQueries(graph: DataflowGraph, queries: readonl continue; } } + if(isQuoted(node, graph)) { + /* if the call is quoted, we do not want to link to it */ + continue; + } if(targets) { initialIdCollector.add(query.kind, query.subkind, [node, targets]); } else { diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index 192e9d7423..6d7c69b647 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -11,6 +11,7 @@ import { withShell } from '../../_helper/shell'; import { assertQuery } from '../../_helper/query'; import { label } from '../../_helper/label'; import type { QueryResultsWithoutMeta } from '../../../../src/queries/query'; +import { BuiltIn } from '../../../../src/dataflow/environments/built-in'; /** simple query shortcut */ @@ -51,29 +52,33 @@ describe('Call Context Query', withShell(shell => { } testQuery('No Call', '1', [q(/print/)], baseResult({})); testQuery('No Call (Symbol)', 'print', [q(/print/)], baseResult({})); - testQuery('No Call (Symbol)', 'print <- 3', [q(/print/)], baseResult({})); - testQuery('No Wanted Call', 'cat()', [q(/print/)], baseResult({})); + testQuery('No Call (Symbol, Definition)', 'print <- 3', [q(/print/)], baseResult({})); + testQuery('Unwanted Call', 'cat()', [q(/print/)], baseResult({})); + testQuery('Quoted Call', 'quote(print())', [q(/print/)], baseResult({})); describe('Local Targets', () => { testQuery('Happy Foo(t)', 'foo <- function(){}\nfoo()', [q(/foo/)], r([{ id: 7 }])); testQuery('Happy Foo(t) (only local)', 'foo <- function(){}\nfoo()', [q(/foo/, { callTargets: CallTargets.OnlyLocal })], r([{ id: 7, calls: [4] }])); + testQuery('Happy Foo(t) (incl. local)', 'foo <- function(){}\nfoo()', [q(/foo/, { callTargets: CallTargets.MustIncludeLocal })], r([{ id: 7, calls: [4] }])); testQuery('Happy Foo(t) (only global)', 'foo <- function(){}\nfoo()', [q(/foo/, { callTargets: CallTargets.OnlyGlobal })], baseResult({})); + testQuery('Happy Foo(t) (incl. global)', 'foo <- function(){}\nfoo()', [q(/foo/, { callTargets: CallTargets.MustIncludeGlobal })], baseResult({})); testQuery('Happy Foo(t) (two local candidates)', 'if(x) { foo <- function(){} } else { foo <- function(){} }\nfoo()', [q(/foo/, { callTargets: CallTargets.OnlyLocal })], r([{ id: 21, calls: [16, 7] }])); + testQuery('Nested Calls', 'foo <- function() { bar <- function() {}; bar() }\nfoo()', [q(/bar/)], r([{ id: 10 }])); }); describe('Global Targets', () => { testQuery('Print calls', 'print(1)', [q(/print/)], r([{ id: 3 }])); testQuery('Non-Alph calls', 'x <- 2', [q(/<-/)], r([{ id: 2 }])); testQuery('Built-In calls', 'if(x) 3 else 2', [q(/if/)], r([{ id: 5 }])); testQuery('Multiple wanted Calls', 'print(1); print(2)', [q(/print/)], r([{ id: 3 }, { id: 7 }])); - testQuery('Print calls (global)', 'print(1)', [q(/print/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [] }])); + testQuery('Print calls (global)', 'print(1)', [q(/print/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [BuiltIn] }])); testQuery('Higher-Order Calls', 'lapply(c(1,2,3),print)', [q(/print/)], r([{ id: 10 }])); }); - /* TODO: normal test for maybe scope overshadow: x <- 3, f <- function() { if(y) { x <- 2; } print(x) }, f() */ - // TODO: local and global; nested calls, using quote, ... describe('Mixed Targets', () => { const code = 'if(x) { print <- function() {} }\nprint()'; testQuery('May be local or global', code, [q(/print/)], r([{ id: 12 }])); testQuery('May be local or global (only local)', code, [q(/print/, { callTargets: CallTargets.OnlyLocal })], baseResult({})); + testQuery('May be local or global (incl. local)', code, [q(/print/, { callTargets: CallTargets.MustIncludeLocal })], r([{ id: 12, calls: [7, BuiltIn] }])); testQuery('May be local or global (only global)', code, [q(/print/, { callTargets: CallTargets.OnlyGlobal })], baseResult({})); + testQuery('May be local or global (incl. global)', code, [q(/print/, { callTargets: CallTargets.MustIncludeGlobal })], r([{ id: 12, calls: [7, BuiltIn] }])); }); describe('Linked Calls', () => { // with one finding its parent, and one that does not From 8a8e324446fdb4462d621a11dec3c7d422ce0429 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 00:24:22 +0200 Subject: [PATCH 12/41] refactor: clean up call promotions --- src/queries/base-query-format.ts | 1 + .../call-context-query-executor.ts | 37 +++++++++++++++---- .../call-context-query-format.ts | 12 +++--- .../query/call-context-query-tests.ts | 3 +- 4 files changed, 36 insertions(+), 17 deletions(-) diff --git a/src/queries/base-query-format.ts b/src/queries/base-query-format.ts index b351810cf4..3201e13a00 100644 --- a/src/queries/base-query-format.ts +++ b/src/queries/base-query-format.ts @@ -1,3 +1,4 @@ + export interface BaseQueryFormat { /** used to select the query type :) */ readonly type: string; diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index a47c5a1063..de9d137679 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -2,9 +2,9 @@ import type { DataflowGraph } from '../../dataflow/graph/graph'; import type { CallContextQuery, CallContextQueryKindResult, - CallContextQueryResult } from './call-context-query-format'; -import { - CallTargets + CallContextQueryResult, SubCallContextQueryFormat +} from './call-context-query-format'; +import { CallTargets } from './call-context-query-format'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { VertexType } from '../../dataflow/graph/vertex'; @@ -137,6 +137,31 @@ function makeReport(collector: TwoLayerCollector { + if(isSubCallQuery(q)) { + return { + ...q, + callName: new RegExp(q.callName), + linkTo: { + ...q.linkTo, + /* we have to add another promotion layer whenever we add something without this call name */ + callName: new RegExp(q.linkTo.callName) + } + }; + } else { + return { + ...q, + callName: new RegExp(q.callName) + }; + } + }); +} + /** * Multi-stage call context query resolve. * @@ -152,11 +177,7 @@ export function executeCallContextQueries(graph: DataflowGraph, queries: readonl const initialIdCollector = new TwoLayerCollector(); /* promote all strings to regex patterns */ - const promotedQueries = queries.map(q => ({ - ...q, - callName: new RegExp(q.callName) - })); - + const promotedQueries = promoteQueryCallNames(queries); for(const [node, info] of graph.vertices(true)) { if(info.tag !== VertexType.FunctionCall) { diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 76a5ffd3c8..2864a93393 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -32,23 +32,21 @@ export interface DefaultCallContextQueryFormat extends BaseQueryFormat { /** * Links the current call to the last call of the given kind. * This way, you can link a call like `points` to the latest graphics plot etc. - * Please note that this may still result in a standalone, unlinked result - * if we are unable to find a call of the given kind. */ interface LinkToLastCall extends BaseQueryFormat { - readonly type: 'link-to-last-call'; - /** Regex regarding the function name of the last call */ - readonly callName?: RegExp; + readonly type: 'link-to-last-call'; + /** Regex regarding the function name of the last call. Similar to {@link DefaultCallContextQueryFormat#callName}, strings are interpreted as a `RegExp`. */ + readonly callName: RegExp | string; } type LinkTo = LinkToLastCall; -interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { +export interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { readonly linkTo: LinkTo; } - export interface CallContextQuerySubKindResult { + /** The id of the call vertex identified within the supplied dataflow graph */ readonly id: NodeId; /** * Ids of functions which are called by the respective function call, diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index 6d7c69b647..5dfd959ec8 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -44,7 +44,6 @@ function r(results: readonly CallContextQuerySubKindResult[], kind = 'test-kind' }); } -/* TODO: check what happens if builtin if may be override */ // TODO: documentation describe('Call Context Query', withShell(shell => { function testQuery(name: string, code: string, query: readonly CallContextQuery[], expected: QueryResultsWithoutMeta) { @@ -81,7 +80,7 @@ describe('Call Context Query', withShell(shell => { testQuery('May be local or global (incl. global)', code, [q(/print/, { callTargets: CallTargets.MustIncludeGlobal })], r([{ id: 12, calls: [7, BuiltIn] }])); }); describe('Linked Calls', () => { - // with one finding its parent, and one that does not + // TODO: with one finding its parent, and one that does not }); describe('Multiple Kinds', () => { testQuery('Multiple Kinds', 'print(1); foo(2)', [q(/print/, { kind: 'print-kind' }), q(/foo/, { kind: 'foo-kind' })], baseResult({ From 989301aae3963bb2c018bf9641446cd130683f78 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 01:13:30 +0200 Subject: [PATCH 13/41] wip: work on FDs --- src/dataflow/graph/vertex.ts | 10 +++++++++- src/dataflow/info.ts | 4 ++-- .../functions/call/built-in/built-in-apply.ts | 7 ++++++- .../call/built-in/built-in-function-definition.ts | 2 ++ .../internal/process/functions/call/common.ts | 2 ++ .../process/functions/call/known-call-handling.ts | 5 +++-- .../process/functions/call/unnamed-call-handling.ts | 8 +++++--- .../internal/process/functions/process-argument.ts | 4 +++- src/dataflow/internal/process/process-symbol.ts | 3 ++- src/dataflow/internal/process/process-value.ts | 3 ++- src/dataflow/processor.ts | 12 +++++++++++- src/util/mermaid/dfg.ts | 9 +++++---- 12 files changed, 52 insertions(+), 17 deletions(-) diff --git a/src/dataflow/graph/vertex.ts b/src/dataflow/graph/vertex.ts index 24e33ee618..e8840d2b0c 100644 --- a/src/dataflow/graph/vertex.ts +++ b/src/dataflow/graph/vertex.ts @@ -39,6 +39,11 @@ interface DataflowGraphVertexBase extends MergeableRecord { * See {@link IdentifierReference} */ controlDependencies: ControlDependency[] | undefined + /** + * This is propagated by the active {@link DataflowInformation} + * and contains all nodes that are potential dataflow predecessors of the current node. + */ + flowDependencies: NodeId[] | undefined } export interface DataflowGraphValue extends DataflowGraphVertexBase { @@ -68,7 +73,10 @@ export interface DataflowGraphVertexFunctionCall extends DataflowGraphVertexBase */ readonly name: string args: FunctionArgument[] - /** a performance flag to indicate that the respective call is _only_ calling a builtin function without any df graph attached */ + /** + * This is a performance flag to indicate that the respective call is _only_ calling a builtin function without any df graph attached. + * Do not rely on it for informational value, it may over-approximate. + */ onlyBuiltin: boolean environment: REnvironmentInformation | undefined } diff --git a/src/dataflow/info.ts b/src/dataflow/info.ts index 25a1c3fc61..08bfa48a68 100644 --- a/src/dataflow/info.ts +++ b/src/dataflow/info.ts @@ -42,11 +42,11 @@ export interface DataflowCfgInformation { /** * The entry node into the subgraph */ - entryPoint: NodeId, + readonly entryPoint: NodeId, /** * All already identified exit points (active 'return'/'break'/'next'-likes) of the respective structure. */ - exitPoints: readonly ExitPoint[] + readonly exitPoints: readonly ExitPoint[] } /** diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts index c82775112e..14ecafabc8 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts @@ -82,7 +82,12 @@ export function processApply( args: allOtherArguments, environment: data.environment, onlyBuiltin: false, - controlDependencies: data.controlDependencies + controlDependencies: data.controlDependencies, + /* + * the call happens after all arguments complete, however, + * as they are lazy the call is actually root-level for the FD edges, so we know nothing + */ + flowDependencies: [] }); for(const arg of processedArguments) { diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts index 65fca3f7d5..d30403340a 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts @@ -86,6 +86,8 @@ export function processFunctionDefinition( const outEnvironment = overwriteEnvironment(paramsEnvironments, bodyEnvironment); + /* TODO: continue flow dependencies foll all vertices + make it so call trace the exit points of the function ~> write specific tests ~> use for linkto */ + for(const read of remainingRead) { if(read.name) { subgraph.addVertex({ diff --git a/src/dataflow/internal/process/functions/call/common.ts b/src/dataflow/internal/process/functions/call/common.ts index 54f0107e0a..c135361575 100644 --- a/src/dataflow/internal/process/functions/call/common.ts +++ b/src/dataflow/internal/process/functions/call/common.ts @@ -95,6 +95,7 @@ export function processAllArguments( } const processed = processDataflowFor(arg, { ...data, environment: argEnv }); + /* is the argument to force? (inlined) */ if(arg.type === RType.Argument && arg.value && (forceArgs === 'all' || forceArgs[i]) && arg.value.type !== RType.Number && arg.value.type !== RType.String && arg.value.type !== RType.Logical) { forceVertexArgumentValueReferences(functionRootId, processed, processed.graph, argEnv); } @@ -155,6 +156,7 @@ export function patchFunctionCall( /* will be overwritten accordingly */ onlyBuiltin: false, controlDependencies: data.controlDependencies, + flowDependencies: [], args: argumentProcessResult.map(arg => arg === undefined ? EmptyArgument : { nodeId: arg.entryPoint, controlDependencies: undefined, call: undefined }) }); for(const arg of argumentProcessResult) { diff --git a/src/dataflow/internal/process/functions/call/known-call-handling.ts b/src/dataflow/internal/process/functions/call/known-call-handling.ts index a499fbe482..26c6bf48ec 100644 --- a/src/dataflow/internal/process/functions/call/known-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/known-call-handling.ts @@ -71,7 +71,7 @@ export function processKnownFunctionCall( callArgs, remainingReadInArgs, processedArguments - } = processAllArguments({ functionName, args: processArgs, data, finalGraph, functionRootId: rootId, patchData, forceArgs }); + } = processAllArguments({ functionName, args: processArgs, data: { ...data, flowDependencies: [rootId] }, finalGraph, functionRootId: rootId, patchData, forceArgs }); if(markAsNSE) { markNonStandardEvaluationEdges(markAsNSE, processedArguments, finalGraph, rootId); } @@ -84,7 +84,8 @@ export function processKnownFunctionCall( /* will be overwritten accordingly */ onlyBuiltin: false, controlDependencies: data.controlDependencies, - args: reverseOrder ? [...callArgs].reverse() : callArgs + args: reverseOrder ? [...callArgs].reverse() : callArgs, + flowDependencies: data.flowDependencies }); if(hasUnknownSideEffect) { diff --git a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts index 9d04c30a4d..fe2d65db5d 100644 --- a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts @@ -14,10 +14,11 @@ import { dataflowLogger } from '../../../../logger'; export const UnnamedFunctionCallPrefix = 'unnamed-function-call-'; export function processUnnamedFunctionCall(functionCall: RUnnamedFunctionCall, data: DataflowProcessorInformation): DataflowInformation { - const calledFunction = processDataflowFor(functionCall.calledFunction, data); + const functionRootId = functionCall.info.id; + + const calledFunction = processDataflowFor(functionCall.calledFunction, { ...data, flowDependencies: [functionRootId] }); const finalGraph = new DataflowGraph(data.completeAst.idMap); - const functionRootId = functionCall.info.id; const calledRootId = functionCall.calledFunction.info.id; const functionCallName = `${UnnamedFunctionCallPrefix}${functionRootId}`; dataflowLogger.debug(`Using ${functionRootId} as root for the unnamed function call`); @@ -34,7 +35,7 @@ export function processUnnamedFunctionCall(functionCall: RUnnamedFunc } = processAllArguments({ functionName: calledFunction, args: functionCall.arguments, - data, + data: { ...data, flowDependencies: [functionRootId] }, finalGraph, functionRootId /* we know the call is right there and fully resolved, there is no need to artificially force arguments as we identify them within the subtree */ @@ -48,6 +49,7 @@ export function processUnnamedFunctionCall(functionCall: RUnnamedFunc /* can never be a direct built-in-call */ onlyBuiltin: false, controlDependencies: data.controlDependencies, + flowDependencies: data.flowDependencies, args: callArgs // same reference }); diff --git a/src/dataflow/internal/process/functions/process-argument.ts b/src/dataflow/internal/process/functions/process-argument.ts index 431176b44b..5cf08e3fab 100644 --- a/src/dataflow/internal/process/functions/process-argument.ts +++ b/src/dataflow/internal/process/functions/process-argument.ts @@ -38,9 +38,11 @@ export function processFunctionArgument( graph.addVertex({ tag: VertexType.Use, id: argument.info.id, - controlDependencies: data.controlDependencies + controlDependencies: data.controlDependencies, + flowDependencies: data.flowDependencies }); entryPoint = argument.info.id; + data = { ...data, flowDependencies: [argument.info.id] }; } const ingoingRefs = [...value?.unknownReferences ?? [], ...value?.in ?? [], ...(name === undefined ? [] : [...name.in])]; diff --git a/src/dataflow/internal/process/process-symbol.ts b/src/dataflow/internal/process/process-symbol.ts index 752e2abf10..736976dfb6 100644 --- a/src/dataflow/internal/process/process-symbol.ts +++ b/src/dataflow/internal/process/process-symbol.ts @@ -20,7 +20,8 @@ export function processSymbol(symbol: RSymbol(value: RNodeWithParent, data: DataflowPr graph: new DataflowGraph(data.completeAst.idMap).addVertex({ tag: VertexType.Value, id: value.info.id, - controlDependencies: data.controlDependencies + controlDependencies: data.controlDependencies, + flowDependencies: data.flowDependencies }), exitPoints: [{ nodeId: value.info.id, type: ExitPointType.Default, controlDependencies: data.controlDependencies }], entryPoint: value.info.id diff --git a/src/dataflow/processor.ts b/src/dataflow/processor.ts index e1fafddcd7..e4353c8217 100644 --- a/src/dataflow/processor.ts +++ b/src/dataflow/processor.ts @@ -10,6 +10,7 @@ import type { import type { REnvironmentInformation } from './environments/environment'; import type { RParseRequest } from '../r-bridge/retriever'; import type { RNode } from '../r-bridge/lang-4.x/ast/model/model'; +import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id'; export interface DataflowProcessorInformation { /** @@ -35,9 +36,18 @@ export interface DataflowProcessorInformation { */ readonly referenceChain: string[] /** - * The chain of control-flow {@link NodeId}s that lead to the current node (e.g. of known ifs). + * The chain of control-flow {@link NodeId}s that lead to the current node (e.g., of known ifs). */ readonly controlDependencies: ControlDependency[] | undefined + /** + * Contains all node ids that are potential control flow predecessors of the entry point that we are currently able to identify. + * In general, this represents the flow dependency of a CFG. + * Every handler for an inner fold has to handle the predecessor property. + * Additionally, calls have to update the predecessor linkage of the call entry point. + * In short, this may be incomplete (e.g., in case of function calls). + * There is no intention of creating a total order with the FDs, in general we try to approximate the order of execution using a partial order. + */ + readonly flowDependencies: NodeId[] } export type DataflowProcessor> = (node: NodeType, data: DataflowProcessorInformation) => DataflowInformation diff --git a/src/util/mermaid/dfg.ts b/src/util/mermaid/dfg.ts index c3d01d2418..935c7e8631 100644 --- a/src/util/mermaid/dfg.ts +++ b/src/util/mermaid/dfg.ts @@ -107,7 +107,7 @@ function displayFunctionArgMapping(argMapping: readonly FunctionArgument[]): str } return result.length === 0 ? '' : `\n (${result.join(', ')})`; } -function encodeEdge(from: string, to: string, types: Set): string { +function encodeEdge(from: string, to: string, types: Set): string { return `${from}->${to}["${[...types].join(':')}"]`; } @@ -177,8 +177,9 @@ function vertexToMermaid(info: DataflowGraphVertexInfo, mermaid: MermaidGraph, i const edges = mermaid.rootGraph.get(id, true); guard(edges !== undefined, `node ${id} must be found`); - const artificialCdEdges = (info.controlDependencies ?? []).map(x => [x.id, { types: new Set([x.when ? 'CD-True' : 'CD-False']) }] as const); - for(const [target, edge] of [...edges[1], ...artificialCdEdges]) { + const artificialCdEdges = (info.controlDependencies ?? []).map(x => [x.id, { types: new Set([x.when ? 'CD-True' : 'CD-False']) }] as const); + const artificialFdEdges = (info.flowDependencies ?? []).map(x => [x, { types: new Set(['FD']) }] as const); + for(const [target, edge] of [...edges[1], ...artificialCdEdges, ...artificialFdEdges]) { const edgeTypes = typeof edge.types == 'number' ? new Set(splitEdgeTypes(edge.types)) : edge.types; const edgeId = encodeEdge(idPrefix + id, idPrefix + target, edgeTypes); if(!mermaid.presentEdges.has(edgeId)) { @@ -188,7 +189,7 @@ function vertexToMermaid(info: DataflowGraphVertexInfo, mermaid: MermaidGraph, i // who invented this syntax?! mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:red,color:red,stroke-width:4px;`); } - if(edgeTypes.has('CD-True')) { + if(edgeTypes.has('CD-True') || edgeTypes.has('CD-False') || edgeTypes.has('FD')) { mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:gray,color:gray;`); } } From 7dd919a1329b63e9d8292764618b026891d6c382 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 08:10:08 +0200 Subject: [PATCH 14/41] Revert "wip: work on FDs" (Now tracked in #1005) This reverts commit 989301aae3963bb2c018bf9641446cd130683f78. --- src/dataflow/graph/vertex.ts | 10 +--------- src/dataflow/info.ts | 4 ++-- .../functions/call/built-in/built-in-apply.ts | 7 +------ .../call/built-in/built-in-function-definition.ts | 2 -- .../internal/process/functions/call/common.ts | 2 -- .../process/functions/call/known-call-handling.ts | 5 ++--- .../process/functions/call/unnamed-call-handling.ts | 8 +++----- .../internal/process/functions/process-argument.ts | 4 +--- src/dataflow/internal/process/process-symbol.ts | 3 +-- src/dataflow/internal/process/process-value.ts | 3 +-- src/dataflow/processor.ts | 12 +----------- src/util/mermaid/dfg.ts | 9 ++++----- 12 files changed, 17 insertions(+), 52 deletions(-) diff --git a/src/dataflow/graph/vertex.ts b/src/dataflow/graph/vertex.ts index e8840d2b0c..24e33ee618 100644 --- a/src/dataflow/graph/vertex.ts +++ b/src/dataflow/graph/vertex.ts @@ -39,11 +39,6 @@ interface DataflowGraphVertexBase extends MergeableRecord { * See {@link IdentifierReference} */ controlDependencies: ControlDependency[] | undefined - /** - * This is propagated by the active {@link DataflowInformation} - * and contains all nodes that are potential dataflow predecessors of the current node. - */ - flowDependencies: NodeId[] | undefined } export interface DataflowGraphValue extends DataflowGraphVertexBase { @@ -73,10 +68,7 @@ export interface DataflowGraphVertexFunctionCall extends DataflowGraphVertexBase */ readonly name: string args: FunctionArgument[] - /** - * This is a performance flag to indicate that the respective call is _only_ calling a builtin function without any df graph attached. - * Do not rely on it for informational value, it may over-approximate. - */ + /** a performance flag to indicate that the respective call is _only_ calling a builtin function without any df graph attached */ onlyBuiltin: boolean environment: REnvironmentInformation | undefined } diff --git a/src/dataflow/info.ts b/src/dataflow/info.ts index 08bfa48a68..25a1c3fc61 100644 --- a/src/dataflow/info.ts +++ b/src/dataflow/info.ts @@ -42,11 +42,11 @@ export interface DataflowCfgInformation { /** * The entry node into the subgraph */ - readonly entryPoint: NodeId, + entryPoint: NodeId, /** * All already identified exit points (active 'return'/'break'/'next'-likes) of the respective structure. */ - readonly exitPoints: readonly ExitPoint[] + exitPoints: readonly ExitPoint[] } /** diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts index 14ecafabc8..c82775112e 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts @@ -82,12 +82,7 @@ export function processApply( args: allOtherArguments, environment: data.environment, onlyBuiltin: false, - controlDependencies: data.controlDependencies, - /* - * the call happens after all arguments complete, however, - * as they are lazy the call is actually root-level for the FD edges, so we know nothing - */ - flowDependencies: [] + controlDependencies: data.controlDependencies }); for(const arg of processedArguments) { diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts index d30403340a..65fca3f7d5 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts @@ -86,8 +86,6 @@ export function processFunctionDefinition( const outEnvironment = overwriteEnvironment(paramsEnvironments, bodyEnvironment); - /* TODO: continue flow dependencies foll all vertices + make it so call trace the exit points of the function ~> write specific tests ~> use for linkto */ - for(const read of remainingRead) { if(read.name) { subgraph.addVertex({ diff --git a/src/dataflow/internal/process/functions/call/common.ts b/src/dataflow/internal/process/functions/call/common.ts index c135361575..54f0107e0a 100644 --- a/src/dataflow/internal/process/functions/call/common.ts +++ b/src/dataflow/internal/process/functions/call/common.ts @@ -95,7 +95,6 @@ export function processAllArguments( } const processed = processDataflowFor(arg, { ...data, environment: argEnv }); - /* is the argument to force? (inlined) */ if(arg.type === RType.Argument && arg.value && (forceArgs === 'all' || forceArgs[i]) && arg.value.type !== RType.Number && arg.value.type !== RType.String && arg.value.type !== RType.Logical) { forceVertexArgumentValueReferences(functionRootId, processed, processed.graph, argEnv); } @@ -156,7 +155,6 @@ export function patchFunctionCall( /* will be overwritten accordingly */ onlyBuiltin: false, controlDependencies: data.controlDependencies, - flowDependencies: [], args: argumentProcessResult.map(arg => arg === undefined ? EmptyArgument : { nodeId: arg.entryPoint, controlDependencies: undefined, call: undefined }) }); for(const arg of argumentProcessResult) { diff --git a/src/dataflow/internal/process/functions/call/known-call-handling.ts b/src/dataflow/internal/process/functions/call/known-call-handling.ts index 26c6bf48ec..a499fbe482 100644 --- a/src/dataflow/internal/process/functions/call/known-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/known-call-handling.ts @@ -71,7 +71,7 @@ export function processKnownFunctionCall( callArgs, remainingReadInArgs, processedArguments - } = processAllArguments({ functionName, args: processArgs, data: { ...data, flowDependencies: [rootId] }, finalGraph, functionRootId: rootId, patchData, forceArgs }); + } = processAllArguments({ functionName, args: processArgs, data, finalGraph, functionRootId: rootId, patchData, forceArgs }); if(markAsNSE) { markNonStandardEvaluationEdges(markAsNSE, processedArguments, finalGraph, rootId); } @@ -84,8 +84,7 @@ export function processKnownFunctionCall( /* will be overwritten accordingly */ onlyBuiltin: false, controlDependencies: data.controlDependencies, - args: reverseOrder ? [...callArgs].reverse() : callArgs, - flowDependencies: data.flowDependencies + args: reverseOrder ? [...callArgs].reverse() : callArgs }); if(hasUnknownSideEffect) { diff --git a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts index fe2d65db5d..9d04c30a4d 100644 --- a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts @@ -14,11 +14,10 @@ import { dataflowLogger } from '../../../../logger'; export const UnnamedFunctionCallPrefix = 'unnamed-function-call-'; export function processUnnamedFunctionCall(functionCall: RUnnamedFunctionCall, data: DataflowProcessorInformation): DataflowInformation { - const functionRootId = functionCall.info.id; - - const calledFunction = processDataflowFor(functionCall.calledFunction, { ...data, flowDependencies: [functionRootId] }); + const calledFunction = processDataflowFor(functionCall.calledFunction, data); const finalGraph = new DataflowGraph(data.completeAst.idMap); + const functionRootId = functionCall.info.id; const calledRootId = functionCall.calledFunction.info.id; const functionCallName = `${UnnamedFunctionCallPrefix}${functionRootId}`; dataflowLogger.debug(`Using ${functionRootId} as root for the unnamed function call`); @@ -35,7 +34,7 @@ export function processUnnamedFunctionCall(functionCall: RUnnamedFunc } = processAllArguments({ functionName: calledFunction, args: functionCall.arguments, - data: { ...data, flowDependencies: [functionRootId] }, + data, finalGraph, functionRootId /* we know the call is right there and fully resolved, there is no need to artificially force arguments as we identify them within the subtree */ @@ -49,7 +48,6 @@ export function processUnnamedFunctionCall(functionCall: RUnnamedFunc /* can never be a direct built-in-call */ onlyBuiltin: false, controlDependencies: data.controlDependencies, - flowDependencies: data.flowDependencies, args: callArgs // same reference }); diff --git a/src/dataflow/internal/process/functions/process-argument.ts b/src/dataflow/internal/process/functions/process-argument.ts index 5cf08e3fab..431176b44b 100644 --- a/src/dataflow/internal/process/functions/process-argument.ts +++ b/src/dataflow/internal/process/functions/process-argument.ts @@ -38,11 +38,9 @@ export function processFunctionArgument( graph.addVertex({ tag: VertexType.Use, id: argument.info.id, - controlDependencies: data.controlDependencies, - flowDependencies: data.flowDependencies + controlDependencies: data.controlDependencies }); entryPoint = argument.info.id; - data = { ...data, flowDependencies: [argument.info.id] }; } const ingoingRefs = [...value?.unknownReferences ?? [], ...value?.in ?? [], ...(name === undefined ? [] : [...name.in])]; diff --git a/src/dataflow/internal/process/process-symbol.ts b/src/dataflow/internal/process/process-symbol.ts index 736976dfb6..752e2abf10 100644 --- a/src/dataflow/internal/process/process-symbol.ts +++ b/src/dataflow/internal/process/process-symbol.ts @@ -20,8 +20,7 @@ export function processSymbol(symbol: RSymbol(value: RNodeWithParent, data: DataflowPr graph: new DataflowGraph(data.completeAst.idMap).addVertex({ tag: VertexType.Value, id: value.info.id, - controlDependencies: data.controlDependencies, - flowDependencies: data.flowDependencies + controlDependencies: data.controlDependencies }), exitPoints: [{ nodeId: value.info.id, type: ExitPointType.Default, controlDependencies: data.controlDependencies }], entryPoint: value.info.id diff --git a/src/dataflow/processor.ts b/src/dataflow/processor.ts index e4353c8217..e1fafddcd7 100644 --- a/src/dataflow/processor.ts +++ b/src/dataflow/processor.ts @@ -10,7 +10,6 @@ import type { import type { REnvironmentInformation } from './environments/environment'; import type { RParseRequest } from '../r-bridge/retriever'; import type { RNode } from '../r-bridge/lang-4.x/ast/model/model'; -import type { NodeId } from '../r-bridge/lang-4.x/ast/model/processing/node-id'; export interface DataflowProcessorInformation { /** @@ -36,18 +35,9 @@ export interface DataflowProcessorInformation { */ readonly referenceChain: string[] /** - * The chain of control-flow {@link NodeId}s that lead to the current node (e.g., of known ifs). + * The chain of control-flow {@link NodeId}s that lead to the current node (e.g. of known ifs). */ readonly controlDependencies: ControlDependency[] | undefined - /** - * Contains all node ids that are potential control flow predecessors of the entry point that we are currently able to identify. - * In general, this represents the flow dependency of a CFG. - * Every handler for an inner fold has to handle the predecessor property. - * Additionally, calls have to update the predecessor linkage of the call entry point. - * In short, this may be incomplete (e.g., in case of function calls). - * There is no intention of creating a total order with the FDs, in general we try to approximate the order of execution using a partial order. - */ - readonly flowDependencies: NodeId[] } export type DataflowProcessor> = (node: NodeType, data: DataflowProcessorInformation) => DataflowInformation diff --git a/src/util/mermaid/dfg.ts b/src/util/mermaid/dfg.ts index 935c7e8631..c3d01d2418 100644 --- a/src/util/mermaid/dfg.ts +++ b/src/util/mermaid/dfg.ts @@ -107,7 +107,7 @@ function displayFunctionArgMapping(argMapping: readonly FunctionArgument[]): str } return result.length === 0 ? '' : `\n (${result.join(', ')})`; } -function encodeEdge(from: string, to: string, types: Set): string { +function encodeEdge(from: string, to: string, types: Set): string { return `${from}->${to}["${[...types].join(':')}"]`; } @@ -177,9 +177,8 @@ function vertexToMermaid(info: DataflowGraphVertexInfo, mermaid: MermaidGraph, i const edges = mermaid.rootGraph.get(id, true); guard(edges !== undefined, `node ${id} must be found`); - const artificialCdEdges = (info.controlDependencies ?? []).map(x => [x.id, { types: new Set([x.when ? 'CD-True' : 'CD-False']) }] as const); - const artificialFdEdges = (info.flowDependencies ?? []).map(x => [x, { types: new Set(['FD']) }] as const); - for(const [target, edge] of [...edges[1], ...artificialCdEdges, ...artificialFdEdges]) { + const artificialCdEdges = (info.controlDependencies ?? []).map(x => [x.id, { types: new Set([x.when ? 'CD-True' : 'CD-False']) }] as const); + for(const [target, edge] of [...edges[1], ...artificialCdEdges]) { const edgeTypes = typeof edge.types == 'number' ? new Set(splitEdgeTypes(edge.types)) : edge.types; const edgeId = encodeEdge(idPrefix + id, idPrefix + target, edgeTypes); if(!mermaid.presentEdges.has(edgeId)) { @@ -189,7 +188,7 @@ function vertexToMermaid(info: DataflowGraphVertexInfo, mermaid: MermaidGraph, i // who invented this syntax?! mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:red,color:red,stroke-width:4px;`); } - if(edgeTypes.has('CD-True') || edgeTypes.has('CD-False') || edgeTypes.has('FD')) { + if(edgeTypes.has('CD-True')) { mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:gray,color:gray;`); } } From 2292b90e9da460ff2eb6d87401365743ed9ff4ad Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 09:15:19 +0200 Subject: [PATCH 15/41] refactor: linkTo prerequisites --- .../call-context-query-executor.ts | 105 +++++++----------- .../call-context-query-format.ts | 22 ++-- src/queries/query.ts | 16 ++- src/queries/two-layer-collector.ts | 40 +++++++ src/util/cfg/cfg.ts | 2 - src/util/objects.ts | 16 +++ test/functionality/_helper/query.ts | 3 +- .../query/call-context-query-tests.ts | 2 +- 8 files changed, 122 insertions(+), 84 deletions(-) create mode 100644 src/queries/two-layer-collector.ts diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index de9d137679..9cbe1ca65b 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -2,7 +2,8 @@ import type { DataflowGraph } from '../../dataflow/graph/graph'; import type { CallContextQuery, CallContextQueryKindResult, - CallContextQueryResult, SubCallContextQueryFormat + CallContextQueryResult, CallContextQuerySubKindResult, + SubCallContextQueryFormat } from './call-context-query-format'; import { CallTargets } from './call-context-query-format'; @@ -10,50 +11,13 @@ import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-i import { VertexType } from '../../dataflow/graph/vertex'; import { assertUnreachable } from '../../util/assert'; import { edgeIncludesType, EdgeType } from '../../dataflow/graph/edge'; -import type { DeepWritable } from 'ts-essentials'; import { resolveByName } from '../../dataflow/environments/resolve-by-name'; import { BuiltIn } from '../../dataflow/environments/built-in'; - -class TwoLayerCollector { - readonly store = new Map>(); - - public add(layer1: Layer1, layer2: Layer2, value: Values) { - let layer2Map = this.store.get(layer1); - if(layer2Map === undefined) { - layer2Map = new Map(); - this.store.set(layer1, layer2Map); - } - let values = layer2Map.get(layer2); - if(values === undefined) { - values = []; - layer2Map.set(layer2, values); - } - values.push(value); - } - - public get(layer1: Layer1, layer2: Layer2): Values[] | undefined { - return this.store.get(layer1)?.get(layer2); - } - - public outerKeys(): Iterable { - return this.store.keys(); - } - - public innerKeys(layer1: Layer1): Iterable { - return this.store.get(layer1)?.keys() ?? []; - } - - public asciiSummary() { - let result = ''; - for(const [layer1, layer2Map] of this.store) { - result += `${JSON.stringify(layer1)}\n`; - for(const [layer2, values] of layer2Map) { - result += ` ╰ ${JSON.stringify(layer2)}: ${JSON.stringify(values)}\n`; - } - } - return result; - } -} +import type { ControlFlowInformation } from '../../util/cfg/cfg'; +import { extractCFG } from '../../util/cfg/cfg'; +import { TwoLayerCollector } from '../two-layer-collector'; +import type { BasicQueryData } from '../query'; +import { compactRecord } from '../../util/objects'; function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | 'no' { const callVertex = graph.get(id); @@ -113,21 +77,15 @@ function isQuoted(node: NodeId, graph: DataflowGraph): boolean { return [...vertex.values()].some(({ types }) => edgeIncludesType(types, EdgeType.NonStandardEvaluation)); } -function makeReport(collector: TwoLayerCollector): CallContextQueryKindResult { +function makeReport(collector: TwoLayerCollector): CallContextQueryKindResult { const result: CallContextQueryKindResult = {} as unknown as CallContextQueryKindResult; for(const [kind, collected] of collector.store) { - const subkinds = {} as DeepWritable; + const subkinds = {} as CallContextQueryKindResult[string]['subkinds']; for(const [subkind, values] of collected) { subkinds[subkind] ??= []; const collectIn = subkinds[subkind]; for(const value of values) { - const [id, calls] = value; - if(calls) { - collectIn.push({ id, calls }); - } else { - /* do not even provide the key! */ - collectIn.push({ id }); - } + collectIn.push(value); } } result[kind] = { @@ -141,9 +99,11 @@ function isSubCallQuery(query: CallContextQuery): query is SubCallContextQueryFo return 'linkTo' in query; } -function promoteQueryCallNames(queries: readonly CallContextQuery[]) { - return queries.map(q => { +function promoteQueryCallNames(queries: readonly CallContextQuery[]): { promotedQueries: CallContextQuery[], requiresCfg: boolean } { + let requiresCfg = false; + const promotedQueries = queries.map(q => { if(isSubCallQuery(q)) { + requiresCfg = true; return { ...q, callName: new RegExp(q.callName), @@ -160,6 +120,13 @@ function promoteQueryCallNames(queries: readonly CallContextQuery[]) { }; } }); + + return { promotedQueries, requiresCfg }; +} + +function identifyLinkToRelation(cfg: ControlFlowInformation): NodeId[] { + /* TODO: */ + return []; } /** @@ -170,36 +137,46 @@ function promoteQueryCallNames(queries: readonly CallContextQuery[]) { * This happens during the main resolution! * 3. Attach `linkTo` calls to the respective calls. */ -export function executeCallContextQueries(graph: DataflowGraph, queries: readonly CallContextQuery[]): CallContextQueryResult { +export function executeCallContextQueries({ graph, ast }: BasicQueryData, queries: readonly CallContextQuery[]): CallContextQueryResult { /* omit performance page load */ const now = Date.now(); /* the node id and call targets if present */ - const initialIdCollector = new TwoLayerCollector(); + const initialIdCollector = new TwoLayerCollector(); /* promote all strings to regex patterns */ - const promotedQueries = promoteQueryCallNames(queries); + const { promotedQueries, requiresCfg } = promoteQueryCallNames(queries); + + let cfg = undefined; + if(requiresCfg) { + cfg = extractCFG(ast); + } - for(const [node, info] of graph.vertices(true)) { + for(const [nodeId, info] of graph.vertices(true)) { if(info.tag !== VertexType.FunctionCall) { continue; } for(const query of promotedQueries.filter(q => q.callName.test(info.name))) { let targets: NodeId[] | 'no' | undefined = undefined; if(query.callTargets) { - targets = satisfiesCallTargets(node, graph, query.callTargets); + targets = satisfiesCallTargets(nodeId, graph, query.callTargets); if(targets === 'no') { continue; } } - if(isQuoted(node, graph)) { + if(isQuoted(nodeId, graph)) { /* if the call is quoted, we do not want to link to it */ continue; } - if(targets) { - initialIdCollector.add(query.kind, query.subkind, [node, targets]); - } else { - initialIdCollector.add(query.kind, query.subkind, [node]); + let linkedIds: NodeId[] | undefined = undefined; + if(cfg && isSubCallQuery(query)) { + /* if we have a linkTo query, we have to find the last call */ + const lastCall = identifyLinkToRelation(cfg); + if(lastCall) { + linkedIds = lastCall; + } } + + initialIdCollector.add(query.kind, query.subkind, compactRecord({ id: nodeId, calls: targets, linkedIds })); } } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 2864a93393..9d571d0e59 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -14,10 +14,10 @@ export const enum CallTargets { Any = 'any' } -export interface DefaultCallContextQueryFormat extends BaseQueryFormat { +export interface DefaultCallContextQueryFormat extends BaseQueryFormat { readonly type: 'call-context'; /** Regex regarding the function name, please note that strings will be interpreted as regular expressions too! */ - readonly callName: RegExp | string; + readonly callName: CallName; /** kind may be a step or anything that you attach to the call, this can be used to group calls together (e.g., linking `ggplot` to `visualize`) */ readonly kind: string; /** subkinds are used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`) */ @@ -32,17 +32,19 @@ export interface DefaultCallContextQueryFormat extends BaseQueryFormat { /** * Links the current call to the last call of the given kind. * This way, you can link a call like `points` to the latest graphics plot etc. + * For now, this uses the static Control-Flow-Graph produced by flowR as the FD over-approximation is still not stable (see #1005). + * In short, this means that we are unable to detect origins over function call boundaries but plan on being more precise in the future. */ -interface LinkToLastCall extends BaseQueryFormat { +interface LinkToLastCall extends BaseQueryFormat { readonly type: 'link-to-last-call'; /** Regex regarding the function name of the last call. Similar to {@link DefaultCallContextQueryFormat#callName}, strings are interpreted as a `RegExp`. */ - readonly callName: RegExp | string; + readonly callName: CallName; } -type LinkTo = LinkToLastCall; +type LinkTo = LinkToLastCall; -export interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { - readonly linkTo: LinkTo; +export interface SubCallContextQueryFormat extends DefaultCallContextQueryFormat { + readonly linkTo: LinkTo; } export interface CallContextQuerySubKindResult { @@ -59,12 +61,12 @@ export interface CallContextQuerySubKindResult { } export type CallContextQueryKindResult = Record + /** maps each subkind to the results found, to be freely in the result form, this is mutable */ + subkinds: Record }> export interface CallContextQueryResult extends BaseQueryResult { readonly kinds: CallContextQueryKindResult; } -export type CallContextQuery = DefaultCallContextQueryFormat | SubCallContextQueryFormat; +export type CallContextQuery = DefaultCallContextQueryFormat | SubCallContextQueryFormat; diff --git a/src/queries/query.ts b/src/queries/query.ts index d659d8cbac..b3fc9cbb20 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -7,13 +7,19 @@ import type { VirtualQueryArgumentsWithType } from './virtual-query/virtual-quer import { SupportedVirtualQueries } from './virtual-query/virtual-queries'; import type { Writable } from 'ts-essentials'; import type { VirtualCompoundConstraint } from './virtual-query/compound-query'; +import type { NormalizedAst } from '../r-bridge/lang-4.x/ast/model/processing/decorate'; export type Query = CallContextQuery; export type QueryArgumentsWithType = Query & { type: QueryType }; +export interface BasicQueryData { + readonly ast: NormalizedAst; + readonly graph: DataflowGraph; +} + /* Each executor receives all queries of its type in case it wants to avoid repeated traversal */ -export type QueryExecutor = (graph: DataflowGraph, query: readonly Query[]) => Result; +export type QueryExecutor = (data: BasicQueryData, query: readonly Query[]) => Result; type SupportedQueries = { @@ -28,13 +34,13 @@ export const SupportedQueries = { export type SupportedQueryTypes = keyof typeof SupportedQueries; export type QueryResult = ReturnType; -export function executeQueriesOfSameType(graph: DataflowGraph, ...queries: SpecificQuery[]): QueryResult { +export function executeQueriesOfSameType(data: BasicQueryData, ...queries: readonly SpecificQuery[]): QueryResult { guard(queries.length > 0, 'At least one query must be provided'); /* every query must have the same type */ guard(queries.every(q => q.type === queries[0].type), 'All queries must have the same type'); const executor = SupportedQueries[queries[0].type]; guard(executor !== undefined, `Unsupported query type: ${queries[0].type}`); - return executor(graph, queries) as QueryResult; + return executor(data, queries) as QueryResult; } function isVirtualQuery< @@ -84,12 +90,12 @@ export type QueryResultsWithoutMeta = OmitFromValues = VirtualCompoundConstraint ->(graph: DataflowGraph, queries: readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]): QueryResults { +>(data: BasicQueryData, queries: readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]): QueryResults { const now = Date.now(); const grouped = groupQueriesByType(queries); const results = {} as Writable>; for(const type of Object.keys(grouped) as Base[]) { - results[type] = executeQueriesOfSameType(graph, ...grouped[type]) as QueryResults[Base]; + results[type] = executeQueriesOfSameType(data, ...grouped[type]) as QueryResults[Base]; } results['.meta'] = { timing: Date.now() - now diff --git a/src/queries/two-layer-collector.ts b/src/queries/two-layer-collector.ts new file mode 100644 index 0000000000..20e82a6314 --- /dev/null +++ b/src/queries/two-layer-collector.ts @@ -0,0 +1,40 @@ +export class TwoLayerCollector { + readonly store = new Map>(); + + public add(layer1: Layer1, layer2: Layer2, value: Values) { + let layer2Map = this.store.get(layer1); + if(layer2Map === undefined) { + layer2Map = new Map(); + this.store.set(layer1, layer2Map); + } + let values = layer2Map.get(layer2); + if(values === undefined) { + values = []; + layer2Map.set(layer2, values); + } + values.push(value); + } + + public get(layer1: Layer1, layer2: Layer2): Values[] | undefined { + return this.store.get(layer1)?.get(layer2); + } + + public outerKeys(): Iterable { + return this.store.keys(); + } + + public innerKeys(layer1: Layer1): Iterable { + return this.store.get(layer1)?.keys() ?? []; + } + + public asciiSummary() { + let result = ''; + for(const [layer1, layer2Map] of this.store) { + result += `${JSON.stringify(layer1)}\n`; + for(const [layer2, values] of layer2Map) { + result += ` ╰ ${JSON.stringify(layer2)}: ${JSON.stringify(values)}\n`; + } + } + return result; + } +} diff --git a/src/util/cfg/cfg.ts b/src/util/cfg/cfg.ts index 2910d7f8d8..e6d800673a 100644 --- a/src/util/cfg/cfg.ts +++ b/src/util/cfg/cfg.ts @@ -164,8 +164,6 @@ export function extractCFG(ast: NormalizedAst): Co return foldAst(ast.ast, cfgFolds); } - - function cfgLeaf(type: CfgVertexType): (leaf: RNodeWithParent) => ControlFlowInformation { return (leaf: RNodeWithParent) => { const graph = new ControlFlowGraph(); diff --git a/src/util/objects.ts b/src/util/objects.ts index 0b0320e685..2c2cf6988b 100644 --- a/src/util/objects.ts +++ b/src/util/objects.ts @@ -74,3 +74,19 @@ function assertSameType(base: unknown, addon: unknown): void { throw new Error(`cannot merge different types! ${typeof base} (${JSON.stringify(base, jsonReplacer)}) !== ${typeof addon} (${JSON.stringify(addon, jsonReplacer)})`); } } + +type Defined = Exclude; +type DefinedRecord = { + [K in keyof T as T[K] extends undefined ? never : K]: Defined; +} + +/** from a record take only the keys that are not undefined */ +export function compactRecord>(record: T): DefinedRecord { + const result: Partial> = {}; + for(const key of Object.keys(record)) { + if(record[key] !== undefined) { + result[key] = record[key]; + } + } + return result as DefinedRecord; +} diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 0aaf48c8bb..3bf671f874 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -56,8 +56,7 @@ export function assertQuery< getId: deterministicCountingIdGenerator(0) }).allRemainingSteps(); - const graph = info.dataflow.graph; - const result = executeQueries(graph, queries); + const result = executeQueries({ graph: info.dataflow.graph, ast: info.normalize }, queries); // TODO: demote to logger console.log(`total query time: ${result['.meta'].timing.toFixed(0)}ms (~1ms accuracy)`); diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index 5dfd959ec8..2c4673d233 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -34,7 +34,7 @@ function baseResult(kinds: CallContextQueryKindResult): QueryResultsWithoutMeta< } /** simple result shortcut */ -function r(results: readonly CallContextQuerySubKindResult[], kind = 'test-kind', subkind = 'test-subkind'): QueryResultsWithoutMeta { +function r(results: CallContextQuerySubKindResult[], kind = 'test-kind', subkind = 'test-subkind'): QueryResultsWithoutMeta { return baseResult({ [kind]: { subkinds: { From dcfaf4a20cf753b1b22dd350266e6367fbd4b7b4 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 10:00:52 +0200 Subject: [PATCH 16/41] feat: link-to basics --- .../call-context-query-executor.ts | 28 +++-- src/queries/two-layer-collector.ts | 1 + src/util/cfg/cfg.ts | 3 + src/util/cfg/visitor.ts | 106 +++++------------- .../query/call-context-query-tests.ts | 5 +- 5 files changed, 54 insertions(+), 89 deletions(-) diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 9cbe1ca65b..8d283f6d94 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -13,11 +13,12 @@ import { assertUnreachable } from '../../util/assert'; import { edgeIncludesType, EdgeType } from '../../dataflow/graph/edge'; import { resolveByName } from '../../dataflow/environments/resolve-by-name'; import { BuiltIn } from '../../dataflow/environments/built-in'; -import type { ControlFlowInformation } from '../../util/cfg/cfg'; -import { extractCFG } from '../../util/cfg/cfg'; +import type { ControlFlowGraph } from '../../util/cfg/cfg'; +import { extractCFG } from '../../util/cfg/cfg'; import { TwoLayerCollector } from '../two-layer-collector'; import type { BasicQueryData } from '../query'; import { compactRecord } from '../../util/objects'; +import { visitInReverseOrder } from '../../util/cfg/visitor'; function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | 'no' { const callVertex = graph.get(id); @@ -124,9 +125,23 @@ function promoteQueryCallNames(queries: readonly CallContextQuery[]): { promoted return { promotedQueries, requiresCfg }; } -function identifyLinkToRelation(cfg: ControlFlowInformation): NodeId[] { - /* TODO: */ - return []; +function identifyLinkToLastCallRelation(from: NodeId, cfg: ControlFlowGraph, graph: DataflowGraph, linkTo: RegExp): NodeId[] { + const found: NodeId[] = []; + visitInReverseOrder(cfg, from, node => { + /* we ignore the start id as it cannot be the last call */ + if(node === from) { + return; + } + const vertex = graph.getVertex(node); + if(vertex === undefined || vertex.tag !== VertexType.FunctionCall) { + return; + } + if(linkTo.test(vertex.name)) { + found.push(node); + return true; + } + }); + return found; } /** @@ -170,7 +185,7 @@ export function executeCallContextQueries({ graph, ast }: BasicQueryData, querie let linkedIds: NodeId[] | undefined = undefined; if(cfg && isSubCallQuery(query)) { /* if we have a linkTo query, we have to find the last call */ - const lastCall = identifyLinkToRelation(cfg); + const lastCall = identifyLinkToLastCallRelation(nodeId, cfg.graph, graph, query.linkTo.callName); if(lastCall) { linkedIds = lastCall; } @@ -180,7 +195,6 @@ export function executeCallContextQueries({ graph, ast }: BasicQueryData, querie } } - /* TODO: link to */ console.log(initialIdCollector.asciiSummary()); return { diff --git a/src/queries/two-layer-collector.ts b/src/queries/two-layer-collector.ts index 20e82a6314..1ad3608e50 100644 --- a/src/queries/two-layer-collector.ts +++ b/src/queries/two-layer-collector.ts @@ -1,3 +1,4 @@ +/** two layer map abstraction */ export class TwoLayerCollector { readonly store = new Map>(); diff --git a/src/util/cfg/cfg.ts b/src/util/cfg/cfg.ts index e6d800673a..ff6895fc1c 100644 --- a/src/util/cfg/cfg.ts +++ b/src/util/cfg/cfg.ts @@ -81,6 +81,9 @@ export class ControlFlowGraph { return this; } + outgoing(node: NodeId): ReadonlyMap | undefined { + return this.edgeInformation.get(node); + } rootVertexIds(): ReadonlySet { return this.rootVertices; diff --git a/src/util/cfg/visitor.ts b/src/util/cfg/visitor.ts index d1af6113d3..c5872be461 100644 --- a/src/util/cfg/visitor.ts +++ b/src/util/cfg/visitor.ts @@ -1,88 +1,32 @@ -import { guard } from '../assert'; -import type { CfgEdge, CfgVertex, ControlFlowInformation } from './cfg'; +import type { ControlFlowGraph } from './cfg'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; -export interface NodeVisitingContext { - parent: { - vertex: NodeId, - edge: CfgEdge - } | 'root', - cfg: ControlFlowInformation, - visited: Set, - /** contains the current vertex as well */ - siblings: NodeId[] -} - -interface PredecessorInformation { - source: NodeId, - edge: CfgEdge -} - -export type OnEnterVisitNode = (node: CfgVertex, context: NodeVisitingContext) => void - -class ControlFlowGraphExecutionTraceVisitor { - private readonly onEnter: OnEnterVisitNode; - - constructor(onEnter: OnEnterVisitNode) { - this.onEnter = onEnter; - } - - private visitSingle(node: CfgVertex, context: NodeVisitingContext): void { - if(context.visited.has(node.id)) { - return; +/** + * Visit all nodes reachable from the start node in the control flow graph, traversing the dependencies but ignoring cycles. + * @param graph - The control flow graph. + * @param startNode - The node to start the traversal from. + * @param visitor - The visitor function to call for each node, if you return true the traversal from this node will be stopped. + */ +export function visitInReverseOrder( + graph: ControlFlowGraph, + startNode: NodeId, + // eslint-disable-next-line @typescript-eslint/no-invalid-void-type -- void is used to indicate that the return value is ignored/we never stop + visitor: (node: NodeId) => boolean | void +): void { + const visited = new Set(); + const queue = [startNode]; + while(queue.length > 0) { + const current = queue.pop() as NodeId; + if(visited.has(current)) { + continue; } - // only visit a node if we have visited all of its successors - const successorEdges = context.cfg.graph.edges().get(node.id); - if(successorEdges) { - for(const [target,] of successorEdges) { - if(!context.visited.has(target)) { - return; - } - } + visited.add(current); + if(visitor(current)) { + continue; } - context.visited.add(node.id); - - this.onEnter(node, context); - - // find all ingoing edges - const predecessors = this.retrieveAllPredecessors(context, node); - const siblings = predecessors.map(p => p.source); - for(const predecessor of predecessors) { - const { source, edge } = predecessor; - const sourceVertex = context.cfg.graph.vertices().get(source); - guard(sourceVertex !== undefined, () => `Source vertex with id ${source} not found`); - this.visitSingle(sourceVertex, { - parent: { vertex: node.id, edge }, - cfg: context.cfg, - visited: context.visited, - siblings - }); + const incoming = graph.outgoing(current) ?? []; + for(const [from] of incoming) { + queue.push(from); } } - - private retrieveAllPredecessors(context: NodeVisitingContext, node: CfgVertex) { - const predecessors: PredecessorInformation[] = []; - for(const entry of context.cfg.graph.edges().entries()) { - const [source, targets] = entry; - const target = targets.get(node.id); - if(target) { - predecessors.push({ source, edge: target }); - } - } - return predecessors; - } - - visit(cfg: ControlFlowInformation): void { - const visited = new Set(); - for(const id of cfg.entryPoints) { - const node = cfg.graph.vertices().get(id); - guard(node !== undefined, `Node with id ${id} not present`); - this.visitSingle(node, { parent: 'root', cfg, siblings: [...cfg.entryPoints], visited }); - } - } - -} - -export function visitCfg(cfg: ControlFlowInformation, onVisit: OnEnterVisitNode): void { - return new ControlFlowGraphExecutionTraceVisitor(onVisit).visit(cfg); } diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index 2c4673d233..84006a34bf 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -80,7 +80,10 @@ describe('Call Context Query', withShell(shell => { testQuery('May be local or global (incl. global)', code, [q(/print/, { callTargets: CallTargets.MustIncludeGlobal })], r([{ id: 12, calls: [7, BuiltIn] }])); }); describe('Linked Calls', () => { - // TODO: with one finding its parent, and one that does not + testQuery('Link to Plot', 'plot(x)\nplot(x)\npoints(y)', [q(/points/, { linkTo: { type: 'link-to-last-call', callName: /plot/ } })], r([{ id: 11, linkedIds: [7] }])); + testQuery('Link to Self', 'plot(x)\nplot(y)', [q(/plot/, { linkTo: { type: 'link-to-last-call', callName: /plot/ } })], r([{ id: 3, linkedIds: [] }, { id: 7, linkedIds: [3] }])); + testQuery('Link to Meet', 'if(k) { plot(a) } else { plot(x) }\npoints(y)', [q(/points/, { linkTo: { type: 'link-to-last-call', callName: /plot/ } })], r([{ id: 19, linkedIds: [13, 6] }])); + testQuery('Link to Loop Closure ', 'for(i in v) { points(a); plots(b) }', [q(/points/, { linkTo: { type: 'link-to-last-call', callName: /plot/ } })], r([{ id: 7, linkedIds: [11] }])); }); describe('Multiple Kinds', () => { testQuery('Multiple Kinds', 'print(1); foo(2)', [q(/print/, { kind: 'print-kind' }), q(/foo/, { kind: 'foo-kind' })], baseResult({ From 9e0b41cbe769f1ff11484f02fb0bee1b614c229b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 10:09:24 +0200 Subject: [PATCH 17/41] refactor: clean up current todo stack --- test/functionality/_helper/query.ts | 5 +++-- .../functionality/dataflow/query/call-context-query-tests.ts | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/test/functionality/_helper/query.ts b/test/functionality/_helper/query.ts index 3bf671f874..520f69a1cb 100644 --- a/test/functionality/_helper/query.ts +++ b/test/functionality/_helper/query.ts @@ -12,6 +12,7 @@ import type { VirtualQueryArgumentsWithType } from '../../../src/queries/virtual import type { TestLabel } from './label'; import { decorateLabelContext } from './label'; import type { VirtualCompoundConstraint } from '../../../src/queries/virtual-query/compound-query'; +import { log } from '../../../src/util/log'; function normalizeResults(result: QueryResults): QueryResultsWithoutMeta { @@ -58,8 +59,8 @@ export function assertQuery< const result = executeQueries({ graph: info.dataflow.graph, ast: info.normalize }, queries); - // TODO: demote to logger - console.log(`total query time: ${result['.meta'].timing.toFixed(0)}ms (~1ms accuracy)`); + log.info(`total query time: ${result['.meta'].timing.toFixed(0)}ms (~1ms accuracy)`); + const normalized = normalizeResults(result); /* expect them to be deeply equal */ diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index 84006a34bf..7957852579 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -45,6 +45,7 @@ function r(results: CallContextQuerySubKindResult[], kind = 'test-kind', subkind } // TODO: documentation +// TODO: add REPL and message describe('Call Context Query', withShell(shell => { function testQuery(name: string, code: string, query: readonly CallContextQuery[], expected: QueryResultsWithoutMeta) { assertQuery(label(name), shell, code, query, expected); From 93a4efe7a0184d87ce3e08d8c4836815143c1d57 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 11:21:39 +0200 Subject: [PATCH 18/41] wip: query repl --- src/cli/repl/commands/cfg.ts | 4 +- src/cli/repl/commands/commands.ts | 41 +++++++++---- src/cli/repl/commands/dataflow.ts | 2 +- src/cli/repl/commands/lineage.ts | 2 +- src/cli/repl/commands/normalize.ts | 4 +- src/cli/repl/commands/query.ts | 93 ++++++++++++++++++++++++++++++ 6 files changed, 130 insertions(+), 16 deletions(-) create mode 100644 src/cli/repl/commands/query.ts diff --git a/src/cli/repl/commands/cfg.ts b/src/cli/repl/commands/cfg.ts index 35df0b3847..8e67398783 100644 --- a/src/cli/repl/commands/cfg.ts +++ b/src/cli/repl/commands/cfg.ts @@ -27,8 +27,8 @@ export const controlflowCommand: ReplCommand = { }; export const controlflowStarCommand: ReplCommand = { - description: `Get a mermaid url of the control-flow graph of R code, start with '${fileProtocol}' to indicate a file`, - usageExample: ':controlflow', + description: 'Returns the URL to mermaid.live', + usageExample: ':controlflow*', aliases: [ 'cfg*', 'cf*' ], script: false, fn: async(output, shell, remainingLine) => { diff --git a/src/cli/repl/commands/commands.ts b/src/cli/repl/commands/commands.ts index 044b125f7c..a395d6b282 100644 --- a/src/cli/repl/commands/commands.ts +++ b/src/cli/repl/commands/commands.ts @@ -13,10 +13,15 @@ import { italic , bold } from '../../../util/ansi'; import { splitAtEscapeSensitive } from '../../../util/args'; import { guard } from '../../../util/assert'; import { scripts } from '../../common/scripts-info'; -import { getLineageCommand } from './lineage'; +import { lineageCommand } from './lineage'; +import { queryCommand, queryStarCommand } from './query'; -function printHelpForScript(script: [string, ReplCommand], f: OutputFormatter): string { - const base = ` ${bold(padCmd(':' + script[0]), f)}${script[1].description}`; +function printHelpForScript(script: [string, ReplCommand], f: OutputFormatter, starredVersion?: ReplCommand): string { + let base = ` ${bold(padCmd(':' + script[0] + (starredVersion ? '[*]' : '') + ), f)}${script[1].description}`; + if(starredVersion) { + base += ` (star: ${starredVersion.description})`; + } if(script[1].aliases.length === 0) { return base; } @@ -24,6 +29,20 @@ function printHelpForScript(script: [string, ReplCommand], f: OutputFormatter): return `${base} (alias${aliases.length > 1 ? 'es' : ''}: ${aliases.map(a => bold(':' + a, f)).join(', ')})`; } +function printCommandHelp(formatter: OutputFormatter) { + const scriptHelp = []; + const cmds = commands(); + for(const c of Object.entries(cmds)) { + if(c[1].script || c[0].endsWith('*')) { + continue; + } + const starred = cmds[c[0] + '*']; + scriptHelp.push(printHelpForScript(c, formatter, starred)); + } + + return scriptHelp.sort().join('\n'); +} + export const helpCommand: ReplCommand = { description: 'Show help information', script: false, @@ -32,20 +51,20 @@ export const helpCommand: ReplCommand = { fn: output => { initCommandMapping(); output.stdout(` -If enabled, you can just enter R expressions which get evaluated right away: +If enabled ('--r-session-access'), you can just enter R expressions which get evaluated right away: ${rawPrompt} ${bold('1 + 1', output.formatter)} ${italic('[1] 2', output.formatter)} -Besides that, you can use the following commands. The scripts ${italic('can', output.formatter)} accept further arguments. There are the following basic commands: +Besides that, you can use the following commands. The scripts ${italic('can', output.formatter)} accept further arguments. In general, those ending with [*] may be called with and without the star. +There are the following basic commands: ${ - Array.from(Object.entries(commands())).filter(([, { script }]) => !script).map( - c => printHelpForScript(c, output.formatter)).join('\n') + printCommandHelp(output.formatter) } Furthermore, you can directly call the following scripts which accept arguments. If you are unsure, try to add ${italic('--help', output.formatter)} after the command. ${ Array.from(Object.entries(commands())).filter(([, { script }]) => script).map( - ([command, { description }]) => ` ${bold(padCmd(':' + command), output.formatter)}${description}`).join('\n') + ([command, { description }]) => ` ${bold(padCmd(':' + command), output.formatter)}${description}`).sort().join('\n') } You can combine commands by separating them with a semicolon ${bold(';',output.formatter)}. @@ -68,7 +87,9 @@ const _commands: Record = { 'dataflow*': dataflowStarCommand, 'controlflow': controlflowCommand, 'controlflow*': controlflowStarCommand, - 'lineage': getLineageCommand + 'lineage': lineageCommand, + 'query': queryCommand, + 'query*': queryStarCommand }; let commandsInitialized = false; @@ -167,7 +188,7 @@ export function asOptionName(argument: string): string{ let _longestCommandName: number | undefined = undefined; export function longestCommandName(): number { if(_longestCommandName === undefined) { - _longestCommandName = Array.from(Object.keys(commands()), k => k.length).reduce((p, n) => Math.max(p, n), 0); + _longestCommandName = Array.from(Object.keys(commands()), k => k.endsWith('*') ? k.length + 3 : k.length).reduce((p, n) => Math.max(p, n), 0); } return _longestCommandName; } diff --git a/src/cli/repl/commands/dataflow.ts b/src/cli/repl/commands/dataflow.ts index 576388b9e9..d09441bf60 100644 --- a/src/cli/repl/commands/dataflow.ts +++ b/src/cli/repl/commands/dataflow.ts @@ -25,7 +25,7 @@ export const dataflowCommand: ReplCommand = { }; export const dataflowStarCommand: ReplCommand = { - description: `Get a mermaid url of the dataflow graph of R code, start with '${fileProtocol}' to indicate a file`, + description: 'Returns the URL to mermaid.live', usageExample: ':dataflow*', aliases: [ 'd*', 'df*' ], script: false, diff --git a/src/cli/repl/commands/lineage.ts b/src/cli/repl/commands/lineage.ts index c0aa5e8ad2..18a0e92441 100644 --- a/src/cli/repl/commands/lineage.ts +++ b/src/cli/repl/commands/lineage.ts @@ -65,7 +65,7 @@ export function getLineage(criterion: SingleSlicingCriterion, { idMap } : Normal return result; } -export const getLineageCommand: ReplCommand = { +export const lineageCommand: ReplCommand = { description: 'Get the lineage of an R object', usageExample: ':lineage', aliases: ['lin'], diff --git a/src/cli/repl/commands/normalize.ts b/src/cli/repl/commands/normalize.ts index 2ef404459f..921a13f003 100644 --- a/src/cli/repl/commands/normalize.ts +++ b/src/cli/repl/commands/normalize.ts @@ -25,8 +25,8 @@ export const normalizeCommand: ReplCommand = { }; export const normalizeStarCommand: ReplCommand = { - description: `Get a mermaid url of the normalized AST of R code, start with '${fileProtocol}' to indicate a file`, - usageExample: ':normalize', + description: 'Returns the URL to mermaid.live', + usageExample: ':normalize*', aliases: [ 'n*' ], script: false, fn: async(output, shell, remainingLine) => { diff --git a/src/cli/repl/commands/query.ts b/src/cli/repl/commands/query.ts new file mode 100644 index 0000000000..704d069b6b --- /dev/null +++ b/src/cli/repl/commands/query.ts @@ -0,0 +1,93 @@ +import type { RShell } from '../../../r-bridge/shell'; +import { PipelineExecutor } from '../../../core/pipeline-executor'; +import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines'; +import { fileProtocol, requestFromInput } from '../../../r-bridge/retriever'; +import type { ReplCommand, ReplOutput } from './main'; +import { splitAtEscapeSensitive } from '../../../util/args'; +import type { OutputFormatter } from '../../../util/ansi'; +import { italic , bold } from '../../../util/ansi'; + +async function getDataflow(shell: RShell, remainingLine: string) { + return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(remainingLine.trim()) + }).allRemainingSteps(); +} + +interface QueryPattern { + readonly description: string; + readonly pattern: string; + /* TODO: result */ + readonly call: (formatter: OutputFormatter, args: readonly string[]) => Promise; +} + +function trimWithIndent(str: string, length: number, indent: number): string { + // split str into lines of length max length, then join them with the given indent + const lines = []; + const effictveLength = Math.max(Math.min(length , length - indent), 50); + for(let i = 0; i < str.length; i += effictveLength) { + lines.push(str.slice(i, i + effictveLength)); + } + return lines.join('\n' + ' '.repeat(indent)); +} + +const AvailableQueries = { + 'help': { + description: 'Get help on the available queries', + pattern: ':query help', + // eslint-disable-next-line @typescript-eslint/require-await + call: async f => { + console.log('Available queries:'); + for(const [query, { description, pattern }] of Object.entries(AvailableQueries)) { + console.log(`- [${bold(query, f)}] ${italic(pattern, f)}\n${' '.repeat(query.length + 5)}${trimWithIndent(description, 120, query.length + 5)}`); + } + } + }, + 'call': { + description: 'Call-Context Query (retrieve all calls matching your criteria). The criteria is to be a regex of the callName you are interested in. ', + pattern: ':query ', + // eslint-disable-next-line @typescript-eslint/require-await + call: async(f, args) => { + console.log('Call-Context Query:', args); + } + }, +} as const satisfies Record; + +async function processQueryArgs(line: string, shell: RShell, output: ReplOutput): Promise { + const args = splitAtEscapeSensitive(line); + const query = args.shift(); + + if(!query) { + output.stderr('No query provided, use \':query help\' to get more information.'); + return; + } + + const queryPattern = AvailableQueries[query as keyof typeof AvailableQueries]; + if(!queryPattern) { + output.stderr(`Unknown query: ${query}, use ':query help' to get more information.`); + return; + } + + return await queryPattern.call(output.formatter, args); +} + +export const queryCommand: ReplCommand = { + description: `Query the given R code, start with '${fileProtocol}' to indicate a file. Use the 'help' query to get more information!`, + usageExample: ':query ', + aliases: [], + script: false, + fn: async(output, shell, remainingLine) => { + await processQueryArgs(remainingLine, shell, output); + } +}; + +export const queryStarCommand: ReplCommand = { + description: 'Similar to query, but returns the output in json format.', + usageExample: ':query* ', + aliases: [ ], + script: false, + fn: async(output, shell, remainingLine) => { + /* TODO */ + await processQueryArgs(remainingLine, shell, output); + } +}; From be9577222901772e5870f3186954500ae11572df Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 11:22:36 +0200 Subject: [PATCH 19/41] refactor: update repl commands file names --- src/cli/common/scripts-info.ts | 2 +- src/cli/flowr.ts | 4 ++-- src/cli/repl/commands/{cfg.ts => repl-cfg.ts} | 2 +- .../{commands.ts => repl-commands.ts} | 20 +++++++++---------- .../{dataflow.ts => repl-dataflow.ts} | 2 +- .../commands/{execute.ts => repl-execute.ts} | 2 +- .../commands/{lineage.ts => repl-lineage.ts} | 2 +- .../repl/commands/{main.ts => repl-main.ts} | 0 .../{normalize.ts => repl-normalize.ts} | 2 +- .../repl/commands/{parse.ts => repl-parse.ts} | 6 +++--- .../repl/commands/{query.ts => repl-query.ts} | 2 +- .../repl/commands/{quit.ts => repl-quit.ts} | 2 +- .../commands/{version.ts => repl-version.ts} | 2 +- src/cli/repl/core.ts | 18 ++++++++--------- src/cli/repl/print-version.ts | 2 +- src/cli/repl/server/connection.ts | 2 +- src/cli/repl/server/messages/hello.ts | 2 +- src/cli/repl/server/server.ts | 4 ++-- test/functionality/cli/server.spec.ts | 2 +- test/functionality/lineage/lineage.spec.ts | 2 +- 20 files changed, 40 insertions(+), 40 deletions(-) rename src/cli/repl/commands/{cfg.ts => repl-cfg.ts} (96%) rename src/cli/repl/commands/{commands.ts => repl-commands.ts} (91%) rename src/cli/repl/commands/{dataflow.ts => repl-dataflow.ts} (96%) rename src/cli/repl/commands/{execute.ts => repl-execute.ts} (93%) rename src/cli/repl/commands/{lineage.ts => repl-lineage.ts} (98%) rename src/cli/repl/commands/{main.ts => repl-main.ts} (100%) rename src/cli/repl/commands/{normalize.ts => repl-normalize.ts} (96%) rename src/cli/repl/commands/{parse.ts => repl-parse.ts} (98%) rename src/cli/repl/commands/{query.ts => repl-query.ts} (98%) rename src/cli/repl/commands/{quit.ts => repl-quit.ts} (84%) rename src/cli/repl/commands/{version.ts => repl-version.ts} (96%) diff --git a/src/cli/common/scripts-info.ts b/src/cli/common/scripts-info.ts index 5829cab01d..557f953a3f 100644 --- a/src/cli/common/scripts-info.ts +++ b/src/cli/common/scripts-info.ts @@ -12,7 +12,7 @@ import { summarizerOptions } from './options'; import type { MergeableRecord } from '../../util/objects'; -import { asOptionName } from '../repl/commands/commands'; +import { asOptionName } from '../repl/commands/repl-commands'; interface BaseScriptInformation extends MergeableRecord { diff --git a/src/cli/flowr.ts b/src/cli/flowr.ts index e82773ba1d..921ba40bc2 100644 --- a/src/cli/flowr.ts +++ b/src/cli/flowr.ts @@ -23,9 +23,9 @@ import { scripts } from './common/scripts-info'; import type { RShellOptions } from '../r-bridge/shell'; import { RShell, RShellReviveOptions } from '../r-bridge/shell'; import { waitOnScript } from './repl/execute'; -import { standardReplOutput } from './repl/commands/main'; +import { standardReplOutput } from './repl/commands/repl-main'; import { repl, replProcessAnswer } from './repl/core'; -import { printVersionInformation } from './repl/commands/version'; +import { printVersionInformation } from './repl/commands/repl-version'; import { printVersionRepl } from './repl/print-version'; let _scriptsText: string | undefined; diff --git a/src/cli/repl/commands/cfg.ts b/src/cli/repl/commands/repl-cfg.ts similarity index 96% rename from src/cli/repl/commands/cfg.ts rename to src/cli/repl/commands/repl-cfg.ts index 8e67398783..bec878190e 100644 --- a/src/cli/repl/commands/cfg.ts +++ b/src/cli/repl/commands/repl-cfg.ts @@ -1,4 +1,4 @@ -import type { ReplCommand } from './main'; +import type { ReplCommand } from './repl-main'; import { PipelineExecutor } from '../../../core/pipeline-executor'; import { extractCFG } from '../../../util/cfg/cfg'; import type { RShell } from '../../../r-bridge/shell'; diff --git a/src/cli/repl/commands/commands.ts b/src/cli/repl/commands/repl-commands.ts similarity index 91% rename from src/cli/repl/commands/commands.ts rename to src/cli/repl/commands/repl-commands.ts index a395d6b282..4a6cc529ef 100644 --- a/src/cli/repl/commands/commands.ts +++ b/src/cli/repl/commands/repl-commands.ts @@ -1,20 +1,20 @@ -import { quitCommand } from './quit'; +import { quitCommand } from './repl-quit'; import { stdioCaptureProcessor, waitOnScript } from '../execute'; -import type { ReplCommand } from './main'; +import type { ReplCommand } from './repl-main'; import { rawPrompt } from '../prompt'; -import { versionCommand } from './version'; -import { parseCommand } from './parse'; -import { executeCommand } from './execute'; -import { normalizeCommand, normalizeStarCommand } from './normalize'; -import { dataflowCommand, dataflowStarCommand } from './dataflow'; -import { controlflowCommand, controlflowStarCommand } from './cfg'; +import { versionCommand } from './repl-version'; +import { parseCommand } from './repl-parse'; +import { executeCommand } from './repl-execute'; +import { normalizeCommand, normalizeStarCommand } from './repl-normalize'; +import { dataflowCommand, dataflowStarCommand } from './repl-dataflow'; +import { controlflowCommand, controlflowStarCommand } from './repl-cfg'; import type { OutputFormatter } from '../../../util/ansi'; import { italic , bold } from '../../../util/ansi'; import { splitAtEscapeSensitive } from '../../../util/args'; import { guard } from '../../../util/assert'; import { scripts } from '../../common/scripts-info'; -import { lineageCommand } from './lineage'; -import { queryCommand, queryStarCommand } from './query'; +import { lineageCommand } from './repl-lineage'; +import { queryCommand, queryStarCommand } from './repl-query'; function printHelpForScript(script: [string, ReplCommand], f: OutputFormatter, starredVersion?: ReplCommand): string { let base = ` ${bold(padCmd(':' + script[0] + (starredVersion ? '[*]' : '') diff --git a/src/cli/repl/commands/dataflow.ts b/src/cli/repl/commands/repl-dataflow.ts similarity index 96% rename from src/cli/repl/commands/dataflow.ts rename to src/cli/repl/commands/repl-dataflow.ts index d09441bf60..7099c00d93 100644 --- a/src/cli/repl/commands/dataflow.ts +++ b/src/cli/repl/commands/repl-dataflow.ts @@ -1,4 +1,4 @@ -import type { ReplCommand } from './main'; +import type { ReplCommand } from './repl-main'; import { PipelineExecutor } from '../../../core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines'; import type { RShell } from '../../../r-bridge/shell'; diff --git a/src/cli/repl/commands/execute.ts b/src/cli/repl/commands/repl-execute.ts similarity index 93% rename from src/cli/repl/commands/execute.ts rename to src/cli/repl/commands/repl-execute.ts index 6536e46476..3fdadc949c 100644 --- a/src/cli/repl/commands/execute.ts +++ b/src/cli/repl/commands/repl-execute.ts @@ -1,4 +1,4 @@ -import type { ReplCommand, ReplOutput } from './main'; +import type { ReplCommand, ReplOutput } from './repl-main'; import { italic } from '../../../util/ansi'; import type { RShell } from '../../../r-bridge/shell'; diff --git a/src/cli/repl/commands/lineage.ts b/src/cli/repl/commands/repl-lineage.ts similarity index 98% rename from src/cli/repl/commands/lineage.ts rename to src/cli/repl/commands/repl-lineage.ts index 18a0e92441..43d8313815 100644 --- a/src/cli/repl/commands/lineage.ts +++ b/src/cli/repl/commands/repl-lineage.ts @@ -1,4 +1,4 @@ -import type { ReplCommand } from './main'; +import type { ReplCommand } from './repl-main'; import { PipelineExecutor } from '../../../core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines'; import type { RShell } from '../../../r-bridge/shell'; diff --git a/src/cli/repl/commands/main.ts b/src/cli/repl/commands/repl-main.ts similarity index 100% rename from src/cli/repl/commands/main.ts rename to src/cli/repl/commands/repl-main.ts diff --git a/src/cli/repl/commands/normalize.ts b/src/cli/repl/commands/repl-normalize.ts similarity index 96% rename from src/cli/repl/commands/normalize.ts rename to src/cli/repl/commands/repl-normalize.ts index 921a13f003..9a46373a13 100644 --- a/src/cli/repl/commands/normalize.ts +++ b/src/cli/repl/commands/repl-normalize.ts @@ -1,4 +1,4 @@ -import type { ReplCommand } from './main'; +import type { ReplCommand } from './repl-main'; import { PipelineExecutor } from '../../../core/pipeline-executor'; import { DEFAULT_NORMALIZE_PIPELINE } from '../../../core/steps/pipeline/default-pipelines'; import type { RShell } from '../../../r-bridge/shell'; diff --git a/src/cli/repl/commands/parse.ts b/src/cli/repl/commands/repl-parse.ts similarity index 98% rename from src/cli/repl/commands/parse.ts rename to src/cli/repl/commands/repl-parse.ts index faa9425cc6..0a42d6d198 100644 --- a/src/cli/repl/commands/parse.ts +++ b/src/cli/repl/commands/repl-parse.ts @@ -1,4 +1,4 @@ -import type { ReplCommand } from './main'; +import type { ReplCommand } from './repl-main'; import type { OutputFormatter } from '../../../util/ansi'; import { FontStyles } from '../../../util/ansi'; import { PipelineExecutor } from '../../../core/pipeline-executor'; @@ -19,7 +19,7 @@ type DepthList = { depth: number, node: XmlBasedJson, leaf: boolean }[] function toDepthMap(xml: XmlBasedJson): DepthList { const root = getKeyGuarded(xml, RawRType.ExpressionList); - + const visit: { depth: number, node: XmlBasedJson }[] = [ { depth: 0, node: root } ]; const result: DepthList = []; @@ -30,7 +30,7 @@ function toDepthMap(xml: XmlBasedJson): DepthList { } const children = current.node[childrenKey] as unknown as XmlBasedJson[] | undefined ?? []; - + result.push({ ...current, leaf: children.length === 0 }); children.reverse(); diff --git a/src/cli/repl/commands/query.ts b/src/cli/repl/commands/repl-query.ts similarity index 98% rename from src/cli/repl/commands/query.ts rename to src/cli/repl/commands/repl-query.ts index 704d069b6b..51452393e1 100644 --- a/src/cli/repl/commands/query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -2,7 +2,7 @@ import type { RShell } from '../../../r-bridge/shell'; import { PipelineExecutor } from '../../../core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../../core/steps/pipeline/default-pipelines'; import { fileProtocol, requestFromInput } from '../../../r-bridge/retriever'; -import type { ReplCommand, ReplOutput } from './main'; +import type { ReplCommand, ReplOutput } from './repl-main'; import { splitAtEscapeSensitive } from '../../../util/args'; import type { OutputFormatter } from '../../../util/ansi'; import { italic , bold } from '../../../util/ansi'; diff --git a/src/cli/repl/commands/quit.ts b/src/cli/repl/commands/repl-quit.ts similarity index 84% rename from src/cli/repl/commands/quit.ts rename to src/cli/repl/commands/repl-quit.ts index 2b17bcb965..aff414a339 100644 --- a/src/cli/repl/commands/quit.ts +++ b/src/cli/repl/commands/repl-quit.ts @@ -1,4 +1,4 @@ -import type { ReplCommand } from './main'; +import type { ReplCommand } from './repl-main'; import { log } from '../../../util/log'; export const quitCommand: ReplCommand = { diff --git a/src/cli/repl/commands/version.ts b/src/cli/repl/commands/repl-version.ts similarity index 96% rename from src/cli/repl/commands/version.ts rename to src/cli/repl/commands/repl-version.ts index 7494c4bac5..d67e4787f1 100644 --- a/src/cli/repl/commands/version.ts +++ b/src/cli/repl/commands/repl-version.ts @@ -1,4 +1,4 @@ -import type { ReplCommand, ReplOutput } from './main'; +import type { ReplCommand, ReplOutput } from './repl-main'; import { flowrVersion } from '../../../util/version'; import { guard } from '../../../util/assert'; import type { RShell } from '../../../r-bridge/shell'; diff --git a/src/cli/repl/core.ts b/src/cli/repl/core.ts index ee40efd4f3..892978edb4 100644 --- a/src/cli/repl/core.ts +++ b/src/cli/repl/core.ts @@ -5,17 +5,17 @@ */ import { prompt } from './prompt'; import * as readline from 'readline'; -import { executeRShellCommand } from './commands/execute'; +import { executeRShellCommand } from './commands/repl-execute'; import os from 'os'; import path from 'path'; import fs from 'fs'; import { splitAtEscapeSensitive } from '../../util/args'; import { ColorEffect, Colors, FontStyles } from '../../util/ansi'; -import { getCommand, getCommandNames } from './commands/commands'; +import { getCommand, getCommandNames } from './commands/repl-commands'; import { getValidOptionsForCompletion, scripts } from '../common/scripts-info'; import { fileProtocol } from '../../r-bridge/retriever'; -import type { ReplOutput } from './commands/main'; -import { standardReplOutput } from './commands/main'; +import type { ReplOutput } from './commands/repl-main'; +import { standardReplOutput } from './commands/repl-main'; import { RShell, RShellReviveOptions } from '../../r-bridge/shell'; import type { MergeableRecord } from '../../util/objects'; @@ -118,10 +118,10 @@ export async function replProcessAnswer(output: ReplOutput, expr: string, shell: export interface FlowrReplOptions extends MergeableRecord { /** The shell to use, if you do not pass one it will automatically create a new one with the `revive` option set to 'always'. */ readonly shell?: RShell - /** + /** * A potentially customized readline interface to be used for the repl to *read* from the user, we write the output with the {@link ReplOutput | `output` } interface. * If you want to provide a custom one but use the same `completer`, refer to {@link replCompleter}. - * For the default arguments, see {@link DEFAULT_REPL_READLINE_CONFIGURATION}. + * For the default arguments, see {@link DEFAULT_REPL_READLINE_CONFIGURATION}. */ readonly rl?: readline.Interface /** Defines two methods that every function in the repl uses to output its data. */ @@ -145,9 +145,9 @@ export interface FlowrReplOptions extends MergeableRecord { * */ export async function repl({ - shell = new RShell({ revive: RShellReviveOptions.Always }), - rl = readline.createInterface(DEFAULT_REPL_READLINE_CONFIGURATION), - output = standardReplOutput, + shell = new RShell({ revive: RShellReviveOptions.Always }), + rl = readline.createInterface(DEFAULT_REPL_READLINE_CONFIGURATION), + output = standardReplOutput, historyFile = defaultHistoryFile, allowRSessionAccess = false }: FlowrReplOptions) { diff --git a/src/cli/repl/print-version.ts b/src/cli/repl/print-version.ts index 6d4fb04047..d606bb0772 100644 --- a/src/cli/repl/print-version.ts +++ b/src/cli/repl/print-version.ts @@ -1,4 +1,4 @@ -import { retrieveVersionInformation } from './commands/version'; +import { retrieveVersionInformation } from './commands/repl-version'; import type { RShell } from '../../r-bridge/shell'; export async function printVersionRepl(shell: RShell): Promise { diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index e09cbbb4cd..9b41a073d0 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -36,7 +36,7 @@ import type { RParseRequests } from '../../../r-bridge/retriever'; import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments'; import type { LineageRequestMessage, LineageResponseMessage } from './messages/lineage'; import { requestLineageMessage } from './messages/lineage'; -import { getLineage } from '../commands/lineage'; +import { getLineage } from '../commands/repl-lineage'; import { guard } from '../../../util/assert'; import { doNotAutoSelect } from '../../../reconstruct/auto-select/auto-select-defaults'; diff --git a/src/cli/repl/server/messages/hello.ts b/src/cli/repl/server/messages/hello.ts index 139041b2c7..5dbf7c4726 100644 --- a/src/cli/repl/server/messages/hello.ts +++ b/src/cli/repl/server/messages/hello.ts @@ -1,4 +1,4 @@ -import type { VersionInformation } from '../../commands/version'; +import type { VersionInformation } from '../../commands/repl-version'; import type { IdMessageBase } from './messages'; /** diff --git a/src/cli/repl/server/server.ts b/src/cli/repl/server/server.ts index 2d68abee33..d7a193ec0c 100644 --- a/src/cli/repl/server/server.ts +++ b/src/cli/repl/server/server.ts @@ -1,5 +1,5 @@ -import type { VersionInformation } from '../commands/version'; -import { retrieveVersionInformation } from '../commands/version'; +import type { VersionInformation } from '../commands/repl-version'; +import { retrieveVersionInformation } from '../commands/repl-version'; import { FlowRServerConnection } from './connection'; import { getUnnamedSocketName, sendMessage } from './send'; import type { FlowrHelloResponseMessage } from './messages/hello'; diff --git a/test/functionality/cli/server.spec.ts b/test/functionality/cli/server.spec.ts index a3b17b6159..848f5fcd9e 100644 --- a/test/functionality/cli/server.spec.ts +++ b/test/functionality/cli/server.spec.ts @@ -2,7 +2,7 @@ import { assert } from 'chai'; import { withShell } from '../_helper/shell'; import { fakeSend, withSocket } from '../_helper/net'; import type { FlowrHelloResponseMessage } from '../../../src/cli/repl/server/messages/hello'; -import { retrieveVersionInformation } from '../../../src/cli/repl/commands/version'; +import { retrieveVersionInformation } from '../../../src/cli/repl/commands/repl-version'; import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, diff --git a/test/functionality/lineage/lineage.spec.ts b/test/functionality/lineage/lineage.spec.ts index 5bbd6a8a5d..158b8178ba 100644 --- a/test/functionality/lineage/lineage.spec.ts +++ b/test/functionality/lineage/lineage.spec.ts @@ -3,7 +3,7 @@ import { PipelineExecutor } from '../../../src/core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/default-pipelines'; import { requestFromInput } from '../../../src/r-bridge/retriever'; import type { SingleSlicingCriterion } from '../../../src/slicing/criterion/parse'; -import { getLineage } from '../../../src/cli/repl/commands/lineage'; +import { getLineage } from '../../../src/cli/repl/commands/repl-lineage'; import type { TestLabel } from '../_helper/label'; import { decorateLabelContext, label } from '../_helper/label'; import type { NodeId } from '../../../src/r-bridge/lang-4.x/ast/model/processing/node-id'; From dbdc874be401791f6c47a1a510c2449e37560fae Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 12:54:16 +0200 Subject: [PATCH 20/41] feat(query): repl support --- src/cli/repl/commands/repl-query.ts | 175 +++++++++++++----- .../call-context-query-executor.ts | 6 +- .../call-context-query-format.ts | 24 ++- src/queries/query.ts | 5 + src/queries/two-layer-collector.ts | 11 -- src/util/args.ts | 4 +- src/util/schema.ts | 66 +++++++ 7 files changed, 221 insertions(+), 70 deletions(-) create mode 100644 src/util/schema.ts diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index 51452393e1..2c96d5a4eb 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -5,7 +5,16 @@ import { fileProtocol, requestFromInput } from '../../../r-bridge/retriever'; import type { ReplCommand, ReplOutput } from './repl-main'; import { splitAtEscapeSensitive } from '../../../util/args'; import type { OutputFormatter } from '../../../util/ansi'; -import { italic , bold } from '../../../util/ansi'; +import { bold, italic } from '../../../util/ansi'; + +import type { CallContextQuerySubKindResult } from '../../../queries/call-context-query/call-context-query-format'; +import { CallContextQuerySchema } from '../../../queries/call-context-query/call-context-query-format'; +import { describeSchema } from '../../../util/schema'; +import type { Query, QueryResults, SupportedQueryTypes } from '../../../queries/query'; +import { executeQueries, SupportedQueriesSchema } from '../../../queries/query'; +import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline'; +import type { BaseQueryMeta } from '../../../queries/base-query-format'; +import { jsonReplacer } from '../../../util/json'; async function getDataflow(shell: RShell, remainingLine: string) { return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { @@ -14,70 +23,138 @@ async function getDataflow(shell: RShell, remainingLine: string) { }).allRemainingSteps(); } -interface QueryPattern { - readonly description: string; - readonly pattern: string; - /* TODO: result */ - readonly call: (formatter: OutputFormatter, args: readonly string[]) => Promise; + +function printHelp(output: ReplOutput) { + output.stderr(`Format: ${italic(':query "" ', output.formatter)}`); + output.stdout('The query is an array of query objects to represent multiple queries. Each query object may have the following properties:'); + output.stdout(describeSchema(CallContextQuerySchema, output.formatter)); + output.stdout(`The example ${italic(':query "[{\\"type\\": \\"call-context\\", \\"callName\\": \\"mean\\" }]" mean(1:10)', output.formatter)} would return the call context of the mean function.`); + output.stdout('As a convenience, we interpret any (non-help) string not starting with \'[\' as a regex for the simple call-context query.'); + output.stdout(`Hence, ${italic(':query "mean" mean(1:10)', output.formatter)} is equivalent to the above example.`); } -function trimWithIndent(str: string, length: number, indent: number): string { - // split str into lines of length max length, then join them with the given indent - const lines = []; - const effictveLength = Math.max(Math.min(length , length - indent), 50); - for(let i = 0; i < str.length; i += effictveLength) { - lines.push(str.slice(i, i + effictveLength)); +async function processQueryArgs(line: string, shell: RShell, output: ReplOutput): Promise, processed: PipelineOutput }> { + const args = splitAtEscapeSensitive(line); + const query = args.shift(); + + if(!query) { + output.stderr(`No query provided, use ':query help' to get more information.`); + return; + } + if(query === 'help') { + printHelp(output); + return; + } + + let parsedQuery: Query[] = []; + if(query.startsWith('[')) { + parsedQuery = JSON.parse(query) as Query[]; + const validationResult = SupportedQueriesSchema.validate(parsedQuery); + if(validationResult.error) { + output.stderr(`Invalid query: ${validationResult.error.message}`); + printHelp(output); + return; + } + } else { + parsedQuery = [{ type: 'call-context', callName: query }]; } - return lines.join('\n' + ' '.repeat(indent)); + + const processed = await getDataflow(shell, args.join(' ')); + return { + query: executeQueries({ graph: processed.dataflow.graph, ast: processed.normalize }, parsedQuery), + processed + }; } -const AvailableQueries = { - 'help': { - description: 'Get help on the available queries', - pattern: ':query help', - // eslint-disable-next-line @typescript-eslint/require-await - call: async f => { - console.log('Available queries:'); - for(const [query, { description, pattern }] of Object.entries(AvailableQueries)) { - console.log(`- [${bold(query, f)}] ${italic(pattern, f)}\n${' '.repeat(query.length + 5)}${trimWithIndent(description, 120, query.length + 5)}`); +/* + public asciiSummary() { + let result = ''; + for(const [layer1, layer2Map] of this.store) { + result += `${JSON.stringify(layer1)}\n`; + for(const [layer2, values] of layer2Map) { + result += ` ╰ ${JSON.stringify(layer2)}: ${JSON.stringify(values)}\n`; } } - }, - 'call': { - description: 'Call-Context Query (retrieve all calls matching your criteria). The criteria is to be a regex of the callName you are interested in. ', - pattern: ':query ', - // eslint-disable-next-line @typescript-eslint/require-await - call: async(f, args) => { - console.log('Call-Context Query:', args); - } - }, -} as const satisfies Record; + return result; + } + */ -async function processQueryArgs(line: string, shell: RShell, output: ReplOutput): Promise { - const args = splitAtEscapeSensitive(line); - const query = args.shift(); +function asciiCallContextSubHit(formatter: OutputFormatter, results: CallContextQuerySubKindResult[], processed: PipelineOutput): string { + const result: string[] = []; + for(const { id, calls = [], linkedIds = [] } of results) { + const node = processed.normalize.idMap.get(id); + if(node === undefined) { + result.push(` ${bold('UNKNOWN: ' + JSON.stringify({ calls, linkedIds }))}`); + continue; + } + let line = `${bold(node.lexeme ?? node.info.fullLexeme ?? 'UNKKNOWN', formatter)} (L.${node.location?.[0]})`; + if(calls.length > 0) { + line += ` ${calls.length} calls`; + } + if(linkedIds.length > 0) { + line += ` ${linkedIds.length} links`; + } + result.push(line); + } + return result.join(', '); +} - if(!query) { - output.stderr('No query provided, use \':query help\' to get more information.'); - return; +function asciiCallContext(formatter: OutputFormatter, results: QueryResults<'call-context'>['call-context'], processed: PipelineOutput): string { + /* traverse over 'kinds' and within them 'subkinds' */ + const result: string[] = []; + for(const [kind, { subkinds }] of Object.entries(results['kinds'])) { + result.push(` ╰ ${bold(kind, formatter)}`); + for(const [subkind, values] of Object.entries(subkinds)) { + result.push(` ╰ ${bold(subkind, formatter)}: ${asciiCallContextSubHit(formatter, values, processed)}`); + } } + return result.join('\n'); +} - const queryPattern = AvailableQueries[query as keyof typeof AvailableQueries]; - if(!queryPattern) { - output.stderr(`Unknown query: ${query}, use ':query help' to get more information.`); - return; +function asciiSummary(formatter: OutputFormatter, totalInMs: number, results: QueryResults, processed: PipelineOutput): string { + const result: string[] = []; + + for(const [query, queryResults] of Object.entries(results)) { + if(query === '.meta') { + continue; + } + if(query === 'call-context') { + const out = queryResults as QueryResults<'call-context'>['call-context']; + result.push(`Query: ${bold(query, formatter)} (${out['.meta'].timing.toFixed(0)}ms)`); + result.push(asciiCallContext(formatter, out, processed)); + continue; + } + + result.push(`Query: ${bold(query, formatter)}`); + + let timing = -1; + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + for(const [key, value] of Object.entries(queryResults)) { + if(key === '.meta') { + timing = (value as BaseQueryMeta).timing; + continue; + } + result.push(` ╰ ${key}: ${JSON.stringify(value)}`); + } + result.push(` - Took ${timing.toFixed(0)}ms`); } - return await queryPattern.call(output.formatter, args); + result.push(italic(`All queries together required ≈${results['.meta'].timing.toFixed(0)}ms (total ${totalInMs.toFixed(0)}ms)`, formatter)); + return result.join('\n'); } export const queryCommand: ReplCommand = { - description: `Query the given R code, start with '${fileProtocol}' to indicate a file. Use the 'help' query to get more information!`, - usageExample: ':query ', + description: `Query the given R code, start with '${fileProtocol}' to indicate a file. The query is to be a valid query in json format (use 'help' to get more information).`, + usageExample: ':query "" ', aliases: [], script: false, fn: async(output, shell, remainingLine) => { - await processQueryArgs(remainingLine, shell, output); + const totalStart = Date.now(); + const results = await processQueryArgs(remainingLine, shell, output); + const totalEnd = Date.now(); + if(results) { + output.stdout(asciiSummary(output.formatter, totalEnd - totalStart, results.query, results.processed)); + } } }; @@ -87,7 +164,9 @@ export const queryStarCommand: ReplCommand = { aliases: [ ], script: false, fn: async(output, shell, remainingLine) => { - /* TODO */ - await processQueryArgs(remainingLine, shell, output); + const results = await processQueryArgs(remainingLine, shell, output); + if(results) { + output.stdout(JSON.stringify(results.query, jsonReplacer)); + } } }; diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 8d283f6d94..7d21281f4b 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -191,15 +191,13 @@ export function executeCallContextQueries({ graph, ast }: BasicQueryData, querie } } - initialIdCollector.add(query.kind, query.subkind, compactRecord({ id: nodeId, calls: targets, linkedIds })); + initialIdCollector.add(query.kind ?? '.', query.subkind ?? '.', compactRecord({ id: nodeId, calls: targets, linkedIds })); } } - console.log(initialIdCollector.asciiSummary()); - return { '.meta': { - timing: Date.now() - now + timing: Date.now() - now, }, kinds: makeReport(initialIdCollector) }; diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index 9d571d0e59..b33f19c7c0 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -1,7 +1,8 @@ import type { BaseQueryFormat, BaseQueryResult } from '../base-query-format'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; +import Joi from 'joi'; -export const enum CallTargets { +export enum CallTargets { /** call targets a function that is not defined locally (e.g., the call targets a library function) */ OnlyGlobal = 'global', /** call targets a function that is defined locally or globally, but must include a global function */ @@ -18,10 +19,10 @@ export interface DefaultCallContextQueryFormat readonly type: 'call-context'; /** Regex regarding the function name, please note that strings will be interpreted as regular expressions too! */ readonly callName: CallName; - /** kind may be a step or anything that you attach to the call, this can be used to group calls together (e.g., linking `ggplot` to `visualize`) */ - readonly kind: string; - /** subkinds are used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`) */ - readonly subkind: string; + /** kind may be a step or anything that you attach to the call, this can be used to group calls together (e.g., linking `ggplot` to `visualize`). Defaults to `.` */ + readonly kind?: string; + /** subkinds are used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.` */ + readonly subkind?: string; /** * Call targets the function may have. This defaults to {@link CallTargets#Any}. * Request this specifically to gain all call targets we can resolve. @@ -70,3 +71,16 @@ export interface CallContextQueryResult extends BaseQueryResult { } export type CallContextQuery = DefaultCallContextQueryFormat | SubCallContextQueryFormat; + +export const CallContextQuerySchema = Joi.object({ + type: Joi.string().valid('call-context').required().description('The type of the query.'), + callName: Joi.string().required().description('Regex regarding the function name!'), + kind: Joi.string().optional().description('The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`'), + subkind: Joi.string().optional().description('The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`'), + callTargets: Joi.string().valid(...Object.values(CallTargets)).optional().description('Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve.'), + linkTo: Joi.object({ + type: Joi.string().valid('link-to-last-call').required().description('The type of the linkTo sub-query.'), + callName: Joi.string().required().description('Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression.') + }).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.') +}); + diff --git a/src/queries/query.ts b/src/queries/query.ts index b3fc9cbb20..4a49b73a2a 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,4 +1,5 @@ import type { CallContextQuery } from './call-context-query/call-context-query-format'; +import { CallContextQuerySchema } from './call-context-query/call-context-query-format'; import type { DataflowGraph } from '../dataflow/graph/graph'; import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; import { executeCallContextQueries } from './call-context-query/call-context-query-executor'; @@ -8,6 +9,7 @@ import { SupportedVirtualQueries } from './virtual-query/virtual-queries'; import type { Writable } from 'ts-essentials'; import type { VirtualCompoundConstraint } from './virtual-query/compound-query'; import type { NormalizedAst } from '../r-bridge/lang-4.x/ast/model/processing/decorate'; +import Joi from 'joi'; export type Query = CallContextQuery; @@ -30,6 +32,9 @@ export const SupportedQueries = { 'call-context': executeCallContextQueries } as const satisfies SupportedQueries; +export const SupportedQueriesSchema = Joi.array().items(Joi.alternatives( + CallContextQuerySchema +)); export type SupportedQueryTypes = keyof typeof SupportedQueries; export type QueryResult = ReturnType; diff --git a/src/queries/two-layer-collector.ts b/src/queries/two-layer-collector.ts index 1ad3608e50..e010a1da64 100644 --- a/src/queries/two-layer-collector.ts +++ b/src/queries/two-layer-collector.ts @@ -27,15 +27,4 @@ export class TwoLayerCollector { return this.store.get(layer1)?.keys() ?? []; } - - public asciiSummary() { - let result = ''; - for(const [layer1, layer2Map] of this.store) { - result += `${JSON.stringify(layer1)}\n`; - for(const [layer2, values] of layer2Map) { - result += ` ╰ ${JSON.stringify(layer2)}: ${JSON.stringify(values)}\n`; - } - } - return result; - } } diff --git a/src/util/args.ts b/src/util/args.ts index f4db9b4cd1..e99bb8bb92 100644 --- a/src/util/args.ts +++ b/src/util/args.ts @@ -9,7 +9,7 @@ * * Given an input string like `a "b c" d`, with a space character as split, and escapeQuote set to true, * this splits the arguments similar to common shell interpreters (i.e., `a`, `b c`, and `d`). - * + * * When escapeQuote is set to false instead, we keep quotation marks in the result (i.e., `a`, `"b c"`, and `d`.). * * @param inputString - The string to split @@ -42,7 +42,7 @@ export function splitAtEscapeSensitive(inputString: string, escapeQuote = true, if(!escapeQuote) { current += c; } - } else if(c === '\\') { + } else if(c === '\\' && escapeQuote) { escaped = true; } else { current += c; diff --git a/src/util/schema.ts b/src/util/schema.ts new file mode 100644 index 0000000000..586e7e4682 --- /dev/null +++ b/src/util/schema.ts @@ -0,0 +1,66 @@ +import type Joi from 'joi'; +import type { OutputFormatter } from './ansi'; +import { italic , formatter , bold, ColorEffect, Colors } from './ansi'; + +interface SchemaLine { + level: number; + text: string; +} + +export function describeSchema(schema: Joi.Schema, f: OutputFormatter = formatter): string { + const description = schema.describe(); + const lines = genericDescription(1, f, f.format('.', { effect: ColorEffect.Foreground, color: Colors.White }), description); + const indent = ' '.repeat(4); + return lines.map(line => `${indent.repeat(line.level - 1)}${line.text}`).join('\n'); +} + +export function genericDescription(level: number, formatter: OutputFormatter, name: string, desc: Joi.Description): SchemaLine[] { + const lines = [...headerLine(level, formatter, name, desc.type ?? 'unknown', desc.flags)]; + if('allow' in desc) { + lines.push({ level: level + 1, text: `Allows only the values: ${(desc['allow'] as string[]).map(v => "'" + v + "'").join(', ')}` }); + } + switch(desc.type) { + case 'object': + lines.push(...describeObject(level, formatter, desc)); + break; + default: + /* specific support for others if needed */ + break; + } + return lines; +} + +function printFlags(flags: object | undefined): string { + if(!flags || Object.keys(flags).length === 0) { + return ''; + } + let flagText = ''; + if('presence' in flags) { + flagText += flags['presence'] === 'required' ? 'required' : 'optional'; + } + return '[' + flagText + ']'; +} + +export function headerLine(level: number, formatter: OutputFormatter, name: string, type: string, flags: object | undefined): SchemaLine[] { + const text = `- ${bold(name, formatter)} ${formatter.format(type, { effect: ColorEffect.Foreground, color: Colors.White })} ${printFlags(flags)}`; + const baseLine = { level, text }; + if(flags && 'description' in flags) { + return [baseLine, { level: level + 1, text: italic(flags['description'] as string, formatter) }]; + } + return [baseLine]; +} + +export function describeObject(level: number, formatter: OutputFormatter, desc: Joi.Description): SchemaLine[] { + const lines: SchemaLine[] = []; + + if(!('keys' in desc)) { + return lines; + } + for(const key in desc.keys) { + // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access + const keySchema = desc.keys[key] as Joi.Description; + lines.push(...genericDescription(level + 1, formatter, key, keySchema)); + } + + return lines; +} From 69e4a3a8a31d9672e587ee7f947b163768f2e299 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 13:18:44 +0200 Subject: [PATCH 21/41] refactor: improve sophistication of query schemas --- src/cli/repl/commands/repl-query.ts | 10 +++--- src/cli/repl/server/connection.ts | 18 +++++----- .../{analysis.ts => message-analysis.ts} | 0 .../messages/{error.ts => message-error.ts} | 0 .../messages/{hello.ts => message-hello.ts} | 0 .../{lineage.ts => message-lineage.ts} | 0 src/cli/repl/server/messages/message-query.ts | 29 +++++++++++++++ .../messages/{repl.ts => message-repl.ts} | 0 .../messages/{slice.ts => message-slice.ts} | 0 src/cli/repl/server/messages/messages.ts | 12 +++---- src/cli/repl/server/server.ts | 4 +-- src/cli/repl/server/validate.ts | 2 +- .../call-context-query-format.ts | 13 ------- src/queries/query-schema.ts | 36 +++++++++++++++++++ src/queries/query.ts | 13 ++++--- src/util/schema.ts | 15 ++++++-- test/functionality/cli/server.spec.ts | 6 ++-- 17 files changed, 110 insertions(+), 48 deletions(-) rename src/cli/repl/server/messages/{analysis.ts => message-analysis.ts} (100%) rename src/cli/repl/server/messages/{error.ts => message-error.ts} (100%) rename src/cli/repl/server/messages/{hello.ts => message-hello.ts} (100%) rename src/cli/repl/server/messages/{lineage.ts => message-lineage.ts} (100%) create mode 100644 src/cli/repl/server/messages/message-query.ts rename src/cli/repl/server/messages/{repl.ts => message-repl.ts} (100%) rename src/cli/repl/server/messages/{slice.ts => message-slice.ts} (100%) create mode 100644 src/queries/query-schema.ts diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index 2c96d5a4eb..b9ba40f48a 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -8,13 +8,13 @@ import type { OutputFormatter } from '../../../util/ansi'; import { bold, italic } from '../../../util/ansi'; import type { CallContextQuerySubKindResult } from '../../../queries/call-context-query/call-context-query-format'; -import { CallContextQuerySchema } from '../../../queries/call-context-query/call-context-query-format'; import { describeSchema } from '../../../util/schema'; import type { Query, QueryResults, SupportedQueryTypes } from '../../../queries/query'; -import { executeQueries, SupportedQueriesSchema } from '../../../queries/query'; +import { executeQueries } from '../../../queries/query'; import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline'; import type { BaseQueryMeta } from '../../../queries/base-query-format'; import { jsonReplacer } from '../../../util/json'; +import { AnyQuerySchema, QueriesSchema } from '../../../queries/query-schema'; async function getDataflow(shell: RShell, remainingLine: string) { return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { @@ -27,8 +27,8 @@ async function getDataflow(shell: RShell, remainingLine: string) { function printHelp(output: ReplOutput) { output.stderr(`Format: ${italic(':query "" ', output.formatter)}`); output.stdout('The query is an array of query objects to represent multiple queries. Each query object may have the following properties:'); - output.stdout(describeSchema(CallContextQuerySchema, output.formatter)); - output.stdout(`The example ${italic(':query "[{\\"type\\": \\"call-context\\", \\"callName\\": \\"mean\\" }]" mean(1:10)', output.formatter)} would return the call context of the mean function.`); + output.stdout(describeSchema(AnyQuerySchema, output.formatter)); + output.stdout(`\n\nThe example ${italic(':query "[{\\"type\\": \\"call-context\\", \\"callName\\": \\"mean\\" }]" mean(1:10)', output.formatter)} would return the call context of the mean function.`); output.stdout('As a convenience, we interpret any (non-help) string not starting with \'[\' as a regex for the simple call-context query.'); output.stdout(`Hence, ${italic(':query "mean" mean(1:10)', output.formatter)} is equivalent to the above example.`); } @@ -49,7 +49,7 @@ async function processQueryArgs(line: string, shell: RShell, output: ReplOutput) let parsedQuery: Query[] = []; if(query.startsWith('[')) { parsedQuery = JSON.parse(query) as Query[]; - const validationResult = SupportedQueriesSchema.validate(parsedQuery); + const validationResult = QueriesSchema.validate(parsedQuery); if(validationResult.error) { output.stderr(`Invalid query: ${validationResult.error.message}`); printHelp(output); diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 9b41a073d0..79d43cdf57 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -1,15 +1,15 @@ import { sendMessage } from './send'; import { answerForValidationError, validateBaseMessageFormat, validateMessage } from './validate'; -import type { FileAnalysisRequestMessage, FileAnalysisResponseMessageNQuads } from './messages/analysis'; -import { requestAnalysisMessage } from './messages/analysis'; -import type { SliceRequestMessage, SliceResponseMessage } from './messages/slice'; -import { requestSliceMessage } from './messages/slice'; -import type { FlowrErrorMessage } from './messages/error'; +import type { FileAnalysisRequestMessage, FileAnalysisResponseMessageNQuads } from './messages/message-analysis'; +import { requestAnalysisMessage } from './messages/message-analysis'; +import type { SliceRequestMessage, SliceResponseMessage } from './messages/message-slice'; +import { requestSliceMessage } from './messages/message-slice'; +import type { FlowrErrorMessage } from './messages/message-error'; import type { Socket } from './net'; import { serverLog } from './server'; import type { ILogObj, Logger } from 'tslog'; -import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequestMessage } from './messages/repl'; -import { requestExecuteReplExpressionMessage } from './messages/repl'; +import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequestMessage } from './messages/message-repl'; +import { requestExecuteReplExpressionMessage } from './messages/message-repl'; import { replProcessAnswer } from '../core'; import { PipelineExecutor } from '../../../core/pipeline-executor'; import { LogLevel } from '../../../util/log'; @@ -34,8 +34,8 @@ import * as tmp from 'tmp'; import fs from 'fs'; import type { RParseRequests } from '../../../r-bridge/retriever'; import { makeMagicCommentHandler } from '../../../reconstruct/auto-select/magic-comments'; -import type { LineageRequestMessage, LineageResponseMessage } from './messages/lineage'; -import { requestLineageMessage } from './messages/lineage'; +import type { LineageRequestMessage, LineageResponseMessage } from './messages/message-lineage'; +import { requestLineageMessage } from './messages/message-lineage'; import { getLineage } from '../commands/repl-lineage'; import { guard } from '../../../util/assert'; import { doNotAutoSelect } from '../../../reconstruct/auto-select/auto-select-defaults'; diff --git a/src/cli/repl/server/messages/analysis.ts b/src/cli/repl/server/messages/message-analysis.ts similarity index 100% rename from src/cli/repl/server/messages/analysis.ts rename to src/cli/repl/server/messages/message-analysis.ts diff --git a/src/cli/repl/server/messages/error.ts b/src/cli/repl/server/messages/message-error.ts similarity index 100% rename from src/cli/repl/server/messages/error.ts rename to src/cli/repl/server/messages/message-error.ts diff --git a/src/cli/repl/server/messages/hello.ts b/src/cli/repl/server/messages/message-hello.ts similarity index 100% rename from src/cli/repl/server/messages/hello.ts rename to src/cli/repl/server/messages/message-hello.ts diff --git a/src/cli/repl/server/messages/lineage.ts b/src/cli/repl/server/messages/message-lineage.ts similarity index 100% rename from src/cli/repl/server/messages/lineage.ts rename to src/cli/repl/server/messages/message-lineage.ts diff --git a/src/cli/repl/server/messages/message-query.ts b/src/cli/repl/server/messages/message-query.ts new file mode 100644 index 0000000000..1a8988bbee --- /dev/null +++ b/src/cli/repl/server/messages/message-query.ts @@ -0,0 +1,29 @@ +import type { IdMessageBase, MessageDefinition } from './messages'; +import type { NodeId } from '../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; +import Joi from 'joi'; +import { QueriesSchema } from '../../../../queries/query-schema'; +import type { Query } from '../../../../queries/query'; + +export interface QueryRequestMessage extends IdMessageBase { + type: 'request-query', + /** The {@link FileAnalysisRequestMessage#filetoken} of the file/data */ + filetoken: string, + /** The query to run on the file analysis information */ + query: Query +} + +export const requestQueryMessage: MessageDefinition = { + type: 'request-query', + schema: Joi.object({ + type: Joi.string().valid('request-query').required(), + id: Joi.string().optional().description('If you give the id, the response will be sent to the client with the same id.'), + filetoken: Joi.string().required().description('The filetoken of the file/data retrieved from the analysis request.'), + query: QueriesSchema.required().description('The query to run on the file analysis information.') + }) +}; + +export interface LineageResponseMessage extends IdMessageBase { + type: 'response-lineage', + /** The lineage of the given criterion. With this being the representation of a set, there is no guarantee about order. */ + lineage: NodeId[] +} diff --git a/src/cli/repl/server/messages/repl.ts b/src/cli/repl/server/messages/message-repl.ts similarity index 100% rename from src/cli/repl/server/messages/repl.ts rename to src/cli/repl/server/messages/message-repl.ts diff --git a/src/cli/repl/server/messages/slice.ts b/src/cli/repl/server/messages/message-slice.ts similarity index 100% rename from src/cli/repl/server/messages/slice.ts rename to src/cli/repl/server/messages/message-slice.ts diff --git a/src/cli/repl/server/messages/messages.ts b/src/cli/repl/server/messages/messages.ts index 7d0b243c51..a7d6d88363 100644 --- a/src/cli/repl/server/messages/messages.ts +++ b/src/cli/repl/server/messages/messages.ts @@ -4,12 +4,12 @@ * @module */ import * as Joi from 'joi'; -import type { FlowrHelloResponseMessage } from './hello'; -import type { FileAnalysisRequestMessage, FileAnalysisResponseMessageJson } from './analysis'; -import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequestMessage } from './repl'; -import type { SliceRequestMessage, SliceResponseMessage } from './slice'; -import type { FlowrErrorMessage } from './error'; -import type { LineageRequestMessage, LineageResponseMessage } from './lineage'; +import type { FlowrHelloResponseMessage } from './message-hello'; +import type { FileAnalysisRequestMessage, FileAnalysisResponseMessageJson } from './message-analysis'; +import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequestMessage } from './message-repl'; +import type { SliceRequestMessage, SliceResponseMessage } from './message-slice'; +import type { FlowrErrorMessage } from './message-error'; +import type { LineageRequestMessage, LineageResponseMessage } from './message-lineage'; /** * If you send a message it must *not* contain a newline but the message must be terminated by a newline. diff --git a/src/cli/repl/server/server.ts b/src/cli/repl/server/server.ts index d7a193ec0c..2376786f54 100644 --- a/src/cli/repl/server/server.ts +++ b/src/cli/repl/server/server.ts @@ -2,8 +2,8 @@ import type { VersionInformation } from '../commands/repl-version'; import { retrieveVersionInformation } from '../commands/repl-version'; import { FlowRServerConnection } from './connection'; import { getUnnamedSocketName, sendMessage } from './send'; -import type { FlowrHelloResponseMessage } from './messages/hello'; -import type { FlowrErrorMessage } from './messages/error'; +import type { FlowrHelloResponseMessage } from './messages/message-hello'; +import type { FlowrErrorMessage } from './messages/message-error'; import type { Server, Socket } from './net'; import { NetServer } from './net'; import { FlowrLogger } from '../../../util/log'; diff --git a/src/cli/repl/server/validate.ts b/src/cli/repl/server/validate.ts index 154f214436..7959b4ac60 100644 --- a/src/cli/repl/server/validate.ts +++ b/src/cli/repl/server/validate.ts @@ -2,7 +2,7 @@ import type * as Joi from 'joi'; import { sendMessage } from './send'; import type { FlowrMessage, IdMessageBase, MessageDefinition } from './messages/messages'; import { baseMessage } from './messages/messages'; -import type { FlowrErrorMessage } from './messages/error'; +import type { FlowrErrorMessage } from './messages/message-error'; import type { Socket } from './net'; export interface ValidationErrorResult { type: 'error', reason: Joi.ValidationError | Error } diff --git a/src/queries/call-context-query/call-context-query-format.ts b/src/queries/call-context-query/call-context-query-format.ts index b33f19c7c0..74d4d538c8 100644 --- a/src/queries/call-context-query/call-context-query-format.ts +++ b/src/queries/call-context-query/call-context-query-format.ts @@ -1,6 +1,5 @@ import type { BaseQueryFormat, BaseQueryResult } from '../base-query-format'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; -import Joi from 'joi'; export enum CallTargets { /** call targets a function that is not defined locally (e.g., the call targets a library function) */ @@ -72,15 +71,3 @@ export interface CallContextQueryResult extends BaseQueryResult { export type CallContextQuery = DefaultCallContextQueryFormat | SubCallContextQueryFormat; -export const CallContextQuerySchema = Joi.object({ - type: Joi.string().valid('call-context').required().description('The type of the query.'), - callName: Joi.string().required().description('Regex regarding the function name!'), - kind: Joi.string().optional().description('The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`'), - subkind: Joi.string().optional().description('The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`'), - callTargets: Joi.string().valid(...Object.values(CallTargets)).optional().description('Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve.'), - linkTo: Joi.object({ - type: Joi.string().valid('link-to-last-call').required().description('The type of the linkTo sub-query.'), - callName: Joi.string().required().description('Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression.') - }).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.') -}); - diff --git a/src/queries/query-schema.ts b/src/queries/query-schema.ts new file mode 100644 index 0000000000..f67b7d692c --- /dev/null +++ b/src/queries/query-schema.ts @@ -0,0 +1,36 @@ +import Joi from 'joi'; +import { CallTargets } from './call-context-query/call-context-query-format'; + +export const CallContextQuerySchema = Joi.object({ + type: Joi.string().valid('call-context').required().description('The type of the query.'), + callName: Joi.string().required().description('Regex regarding the function name!'), + kind: Joi.string().optional().description('The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`'), + subkind: Joi.string().optional().description('The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`'), + callTargets: Joi.string().valid(...Object.values(CallTargets)).optional().description('Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve.'), + linkTo: Joi.object({ + type: Joi.string().valid('link-to-last-call').required().description('The type of the linkTo sub-query.'), + callName: Joi.string().required().description('Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression.') + }).optional().description('Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc.') +}).description('Call context query used to find calls in the dataflow graph'); + +export const SupportedQueriesSchema = Joi.alternatives( + CallContextQuerySchema +).description('Supported queries'); + +export const CompoundQuerySchema = Joi.object({ + type: Joi.string().valid('compound').required().description('The type of the query.'), + query: Joi.string().required().description('The query to run on the file analysis information.'), + commonArguments: Joi.object().required().description('Common arguments for all queries.'), + arguments: Joi.array().items(SupportedQueriesSchema).required().description('Arguments for each query.') +}).description('Compound query used to combine queries of the same type'); + +export const VirtualQuerySchema = Joi.alternatives( + CompoundQuerySchema +).description('Virtual queries (used for structure)'); + +export const AnyQuerySchema = Joi.alternatives( + SupportedQueriesSchema, + VirtualQuerySchema +).description('Any query'); + +export const QueriesSchema = Joi.array().items(AnyQuerySchema); diff --git a/src/queries/query.ts b/src/queries/query.ts index 4a49b73a2a..357ccc8ea2 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -1,5 +1,4 @@ import type { CallContextQuery } from './call-context-query/call-context-query-format'; -import { CallContextQuerySchema } from './call-context-query/call-context-query-format'; import type { DataflowGraph } from '../dataflow/graph/graph'; import type { BaseQueryFormat, BaseQueryResult } from './base-query-format'; import { executeCallContextQueries } from './call-context-query/call-context-query-executor'; @@ -9,7 +8,6 @@ import { SupportedVirtualQueries } from './virtual-query/virtual-queries'; import type { Writable } from 'ts-essentials'; import type { VirtualCompoundConstraint } from './virtual-query/compound-query'; import type { NormalizedAst } from '../r-bridge/lang-4.x/ast/model/processing/decorate'; -import Joi from 'joi'; export type Query = CallContextQuery; @@ -32,10 +30,6 @@ export const SupportedQueries = { 'call-context': executeCallContextQueries } as const satisfies SupportedQueries; -export const SupportedQueriesSchema = Joi.array().items(Joi.alternatives( - CallContextQuerySchema -)); - export type SupportedQueryTypes = keyof typeof SupportedQueries; export type QueryResult = ReturnType; @@ -92,10 +86,15 @@ type OmitFromValues = { export type QueryResultsWithoutMeta = OmitFromValues, '.meta'>, '.meta'>; +export type Queries< + Base extends SupportedQueryTypes, + VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +> = readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]; + export function executeQueries< Base extends SupportedQueryTypes, VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint ->(data: BasicQueryData, queries: readonly (QueryArgumentsWithType | VirtualQueryArgumentsWithType)[]): QueryResults { +>(data: BasicQueryData, queries: Queries): QueryResults { const now = Date.now(); const grouped = groupQueriesByType(queries); const results = {} as Writable>; diff --git a/src/util/schema.ts b/src/util/schema.ts index 586e7e4682..0dab3d433f 100644 --- a/src/util/schema.ts +++ b/src/util/schema.ts @@ -14,7 +14,10 @@ export function describeSchema(schema: Joi.Schema, f: OutputFormatter = formatte return lines.map(line => `${indent.repeat(line.level - 1)}${line.text}`).join('\n'); } -export function genericDescription(level: number, formatter: OutputFormatter, name: string, desc: Joi.Description): SchemaLine[] { +export function genericDescription(level: number, formatter: OutputFormatter, name: string, desc: Joi.Description | undefined): SchemaLine[] { + if(!desc) { + return []; + } const lines = [...headerLine(level, formatter, name, desc.type ?? 'unknown', desc.flags)]; if('allow' in desc) { lines.push({ level: level + 1, text: `Allows only the values: ${(desc['allow'] as string[]).map(v => "'" + v + "'").join(', ')}` }); @@ -23,6 +26,14 @@ export function genericDescription(level: number, formatter: OutputFormatter, na case 'object': lines.push(...describeObject(level, formatter, desc)); break; + case 'alternatives': + if('matches' in desc) { + lines.push( + ...(desc['matches'] as { schema: Joi.Description }[]) + .flatMap(({ schema }) => genericDescription(level + 1, formatter, '.', schema)) + ); + } + break; default: /* specific support for others if needed */ break; @@ -38,7 +49,7 @@ function printFlags(flags: object | undefined): string { if('presence' in flags) { flagText += flags['presence'] === 'required' ? 'required' : 'optional'; } - return '[' + flagText + ']'; + return flagText.trim().length > 0 ? '[' + flagText + ']' : ''; } export function headerLine(level: number, formatter: OutputFormatter, name: string, type: string, flags: object | undefined): SchemaLine[] { diff --git a/test/functionality/cli/server.spec.ts b/test/functionality/cli/server.spec.ts index 848f5fcd9e..06e69b596e 100644 --- a/test/functionality/cli/server.spec.ts +++ b/test/functionality/cli/server.spec.ts @@ -1,17 +1,17 @@ import { assert } from 'chai'; import { withShell } from '../_helper/shell'; import { fakeSend, withSocket } from '../_helper/net'; -import type { FlowrHelloResponseMessage } from '../../../src/cli/repl/server/messages/hello'; +import type { FlowrHelloResponseMessage } from '../../../src/cli/repl/server/messages/message-hello'; import { retrieveVersionInformation } from '../../../src/cli/repl/commands/repl-version'; import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequestMessage -} from '../../../src/cli/repl/server/messages/repl'; +} from '../../../src/cli/repl/server/messages/message-repl'; import type { FileAnalysisRequestMessage, FileAnalysisResponseMessageJson -} from '../../../src/cli/repl/server/messages/analysis'; +} from '../../../src/cli/repl/server/messages/message-analysis'; import { PipelineExecutor } from '../../../src/core/pipeline-executor'; import { jsonReplacer } from '../../../src/util/json'; import { extractCFG } from '../../../src/util/cfg/cfg'; From c88c5ffa39578ae5bcf1cc6f585757b60e60b202 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 13:35:04 +0200 Subject: [PATCH 22/41] feat(query): server message --- src/cli/repl/server/connection.ts | 39 +++++++++++++++++++ src/cli/repl/server/messages/message-query.ts | 15 ++++--- src/cli/repl/server/messages/messages.ts | 3 ++ test/functionality/_helper/label.ts | 4 +- test/functionality/cli/server.spec.ts | 33 +++++++++++++++- 5 files changed, 82 insertions(+), 12 deletions(-) diff --git a/src/cli/repl/server/connection.ts b/src/cli/repl/server/connection.ts index 79d43cdf57..aaadb23e08 100644 --- a/src/cli/repl/server/connection.ts +++ b/src/cli/repl/server/connection.ts @@ -39,6 +39,9 @@ import { requestLineageMessage } from './messages/message-lineage'; import { getLineage } from '../commands/repl-lineage'; import { guard } from '../../../util/assert'; import { doNotAutoSelect } from '../../../reconstruct/auto-select/auto-select-defaults'; +import type { QueryRequestMessage, QueryResponseMessage } from './messages/message-query'; +import { requestQueryMessage } from './messages/message-query'; +import { executeQueries } from '../../../queries/query'; /** * Each connection handles a single client, answering to its requests. @@ -99,6 +102,9 @@ export class FlowRServerConnection { case 'request-lineage': this.handleLineageRequest(request.message as LineageRequestMessage); break; + case 'request-query': + this.handleQueryRequest(request.message as QueryRequestMessage); + break; default: sendMessage(this.socket, { id: request.message.id, @@ -316,6 +322,39 @@ export class FlowRServerConnection { lineage: [...lineageIds] }); } + + private handleQueryRequest(base: QueryRequestMessage) { + const requestResult = validateMessage(base, requestQueryMessage); + + if(requestResult.type === 'error') { + answerForValidationError(this.socket, requestResult, base.id); + return; + } + + const request = requestResult.message; + this.logger.info(`[${this.name}] Received query request for query ${JSON.stringify(request.query)}`); + + const fileInformation = this.fileMap.get(request.filetoken); + if(!fileInformation) { + sendMessage(this.socket, { + id: request.id, + type: 'error', + fatal: false, + reason: `The file token ${request.filetoken} has never been analyzed.` + }); + return; + } + + const { dataflow: dfg, normalize: ast } = fileInformation.pipeline.getResults(true); + guard(dfg !== undefined, `Dataflow graph must be present (request: ${request.filetoken})`); + guard(ast !== undefined, `AST must be present (request: ${request.filetoken})`); + const results = executeQueries({ graph: dfg.graph, ast }, request.query); + sendMessage(this.socket, { + type: 'response-query', + id: request.id, + results + }); + } } export function sanitizeAnalysisResults(results: Partial>): DeepPartial> { diff --git a/src/cli/repl/server/messages/message-query.ts b/src/cli/repl/server/messages/message-query.ts index 1a8988bbee..1f90ef977e 100644 --- a/src/cli/repl/server/messages/message-query.ts +++ b/src/cli/repl/server/messages/message-query.ts @@ -1,15 +1,14 @@ import type { IdMessageBase, MessageDefinition } from './messages'; -import type { NodeId } from '../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import Joi from 'joi'; import { QueriesSchema } from '../../../../queries/query-schema'; -import type { Query } from '../../../../queries/query'; +import type { Queries, QueryResults, SupportedQueryTypes } from '../../../../queries/query'; export interface QueryRequestMessage extends IdMessageBase { type: 'request-query', /** The {@link FileAnalysisRequestMessage#filetoken} of the file/data */ filetoken: string, /** The query to run on the file analysis information */ - query: Query + query: Queries } export const requestQueryMessage: MessageDefinition = { @@ -19,11 +18,11 @@ export const requestQueryMessage: MessageDefinition = { id: Joi.string().optional().description('If you give the id, the response will be sent to the client with the same id.'), filetoken: Joi.string().required().description('The filetoken of the file/data retrieved from the analysis request.'), query: QueriesSchema.required().description('The query to run on the file analysis information.') - }) + }).description('Request a query to be run on the file analysis information.') }; -export interface LineageResponseMessage extends IdMessageBase { - type: 'response-lineage', - /** The lineage of the given criterion. With this being the representation of a set, there is no guarantee about order. */ - lineage: NodeId[] +export interface QueryResponseMessage extends IdMessageBase { + type: 'response-query', + /** Contains an entry for each (non-virtual) query type requested */ + results: QueryResults } diff --git a/src/cli/repl/server/messages/messages.ts b/src/cli/repl/server/messages/messages.ts index a7d6d88363..44625514a9 100644 --- a/src/cli/repl/server/messages/messages.ts +++ b/src/cli/repl/server/messages/messages.ts @@ -10,6 +10,7 @@ import type { ExecuteEndMessage, ExecuteIntermediateResponseMessage, ExecuteRequ import type { SliceRequestMessage, SliceResponseMessage } from './message-slice'; import type { FlowrErrorMessage } from './message-error'; import type { LineageRequestMessage, LineageResponseMessage } from './message-lineage'; +import type { QueryRequestMessage, QueryResponseMessage } from './message-query'; /** * If you send a message it must *not* contain a newline but the message must be terminated by a newline. @@ -54,4 +55,6 @@ export type FlowrMessage = FlowrHelloResponseMessage | SliceResponseMessage | LineageRequestMessage | LineageResponseMessage + | QueryRequestMessage + | QueryResponseMessage | FlowrErrorMessage diff --git a/test/functionality/_helper/label.ts b/test/functionality/_helper/label.ts index 743c6ffa0c..6bf7e661b5 100644 --- a/test/functionality/_helper/label.ts +++ b/test/functionality/_helper/label.ts @@ -174,7 +174,7 @@ function printLabelSummary(): void { printCapability(capability, testNames); } - console.log('-- Tests-By-Context ' + '-'.repeat(80)); + console.log('-- Tests-By-Context (Systematic Only)' + '-'.repeat(80)); const contextMap = new DefaultMap(() => 0); const blockedIds = new Set(); for(const testNames of TheGlobalLabelMap.values()) { @@ -188,7 +188,7 @@ function printLabelSummary(): void { } } } - for(const [context, count] of contextMap.entries()) { + for(const [context, count] of [...contextMap.entries()].sort((a, b) => a[0].localeCompare(b[0]))){ console.log(`- ${context}: ${count}`); } } diff --git a/test/functionality/cli/server.spec.ts b/test/functionality/cli/server.spec.ts index 06e69b596e..f9c14e8df3 100644 --- a/test/functionality/cli/server.spec.ts +++ b/test/functionality/cli/server.spec.ts @@ -18,6 +18,7 @@ import { extractCFG } from '../../../src/util/cfg/cfg'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../../src/core/steps/pipeline/default-pipelines'; import { requestFromInput } from '../../../src/r-bridge/retriever'; import { sanitizeAnalysisResults } from '../../../src/cli/repl/server/connection'; +import type { QueryRequestMessage, QueryResponseMessage } from '../../../src/cli/repl/server/messages/message-query'; describe('flowr', () => { describe('Server', withShell(shell => { @@ -65,7 +66,7 @@ describe('flowr', () => { })); - it('Allow to analyze a simple expression', withSocket(shell, async socket => { + it('Analyze a simple expression', withSocket(shell, async socket => { fakeSend(socket, { type: 'request-file-analysis', id: '42', @@ -97,7 +98,7 @@ describe('flowr', () => { assert.strictEqual(got, expected, 'Expected the second message to have the same results as the slicer'); })); - it('Analyze with the CFG', withSocket(shell, async socket => { + it('Analyze the CFG', withSocket(shell, async socket => { fakeSend(socket, { type: 'request-file-analysis', id: '42', @@ -116,5 +117,33 @@ describe('flowr', () => { const expectedCfg = extractCFG(response.results.normalize); assert.equal(JSON.stringify(gotCfg?.graph, jsonReplacer), JSON.stringify(expectedCfg.graph, jsonReplacer), 'Expected the cfg to be the same as the one extracted from the results'); })); + + it('Process a Query', withSocket(shell, async socket => { + fakeSend(socket, { + type: 'request-file-analysis', + id: '42', + filetoken: 'super-token', + filename: 'x', + content: 'print(17)' + }); + await socket.waitForMessage('response-file-analysis'); + + /* request a query */ + fakeSend(socket, { + type: 'request-query', + id: '21', + filetoken: 'super-token', + query: [{ type: 'call-context', callName: 'print' }] + }); + + await socket.waitForMessage('response-query'); + const messages = socket.getMessages(['hello', 'response-file-analysis', 'response-query']); + const response = messages[2] as QueryResponseMessage; + + console.log(response.results); + assert.exists(response.results['call-context'], 'Expected the query to return at least one result'); + assert.exists(response.results['.meta'], 'Expected the query to return at least one result'); + assert.equal(response.results['call-context']['kinds']['.']['subkinds']['.'].length, 1, 'We should find one call to print!'); + })); })); }); From 6e8d0b617ffc57aa109f659f434e41be5d64c26c Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 14:50:06 +0200 Subject: [PATCH 23/41] wip(query): working on the doc --- .github/workflows/broken-links-and-wiki.yaml | 3 +- package.json | 1 + src/cli/repl/commands/repl-query.ts | 19 +- src/documentation/README.md | 2 +- src/documentation/doc-util/doc-dfg.ts | 95 +++++ src/documentation/doc-util/doc-files.ts | 16 + src/documentation/doc-util/doc-ms.ts | 5 + src/documentation/doc-util/doc-query.ts | 72 ++++ .../print-dataflow-graph-wiki.ts | 102 +---- src/documentation/print-query-wiki.ts | 117 ++++++ .../call-context-query-executor.ts | 8 +- src/util/ansi.ts | 23 ++ .../query/call-context-query-tests.ts | 1 + wiki/Query API.md | 354 ++++++++++++++++++ wiki/_Sidebar.md | 1 + 15 files changed, 703 insertions(+), 116 deletions(-) create mode 100644 src/documentation/doc-util/doc-dfg.ts create mode 100644 src/documentation/doc-util/doc-files.ts create mode 100644 src/documentation/doc-util/doc-ms.ts create mode 100644 src/documentation/doc-util/doc-query.ts create mode 100644 src/documentation/print-query-wiki.ts create mode 100644 wiki/Query API.md diff --git a/.github/workflows/broken-links-and-wiki.yaml b/.github/workflows/broken-links-and-wiki.yaml index 59a3c457dd..4fed28cc37 100644 --- a/.github/workflows/broken-links-and-wiki.yaml +++ b/.github/workflows/broken-links-and-wiki.yaml @@ -75,7 +75,8 @@ jobs: } update_wiki_page "Capabilities" capabilities-markdown - update_wiki_page "Dataflow Graph" df-graph-wiki-markdown + update_wiki_page "Dataflow Graph" df-graph-wiki-markdown + update_wiki_page "Query API" query-wiki-markdown if [ $CHANGED_ANY == "true" ]; then git config --local user.email "action@github.com" diff --git a/package.json b/package.json index c462703d15..25257e5337 100644 --- a/package.json +++ b/package.json @@ -26,6 +26,7 @@ "export-quads": "ts-node src/cli/export-quads-app.ts", "capabilities-markdown": "ts-node src/documentation/print-capabilities-markdown.ts", "df-graph-wiki-markdown": "ts-node src/documentation/print-dataflow-graph-wiki.ts", + "query-wiki-markdown": "ts-node src/documentation/print-query-wiki.ts", "build": "tsc --project .", "build:bundle-flowr": "npm run build && esbuild --bundle dist/src/cli/flowr.js --platform=node --bundle --minify --target=node18 --outfile=dist/src/cli/flowr.min.js", "lint-local": "npx eslint --version && npx eslint src/ test/ --rule \"no-warning-comments: off\"", diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index b9ba40f48a..a5f6179ea4 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -66,19 +66,6 @@ async function processQueryArgs(line: string, shell: RShell, output: ReplOutput) }; } -/* - public asciiSummary() { - let result = ''; - for(const [layer1, layer2Map] of this.store) { - result += `${JSON.stringify(layer1)}\n`; - for(const [layer2, values] of layer2Map) { - result += ` ╰ ${JSON.stringify(layer2)}: ${JSON.stringify(values)}\n`; - } - } - return result; - } - */ - function asciiCallContextSubHit(formatter: OutputFormatter, results: CallContextQuerySubKindResult[], processed: PipelineOutput): string { const result: string[] = []; for(const { id, calls = [], linkedIds = [] } of results) { @@ -111,7 +98,7 @@ function asciiCallContext(formatter: OutputFormatter, results: QueryResults<'cal return result.join('\n'); } -function asciiSummary(formatter: OutputFormatter, totalInMs: number, results: QueryResults, processed: PipelineOutput): string { +export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: number, results: QueryResults, processed: PipelineOutput): string { const result: string[] = []; for(const [query, queryResults] of Object.entries(results)) { @@ -140,7 +127,7 @@ function asciiSummary(formatter: OutputFormatter, totalInMs: number, results: Qu } result.push(italic(`All queries together required ≈${results['.meta'].timing.toFixed(0)}ms (total ${totalInMs.toFixed(0)}ms)`, formatter)); - return result.join('\n'); + return formatter.format(result.join('\n')); } export const queryCommand: ReplCommand = { @@ -153,7 +140,7 @@ export const queryCommand: ReplCommand = { const results = await processQueryArgs(remainingLine, shell, output); const totalEnd = Date.now(); if(results) { - output.stdout(asciiSummary(output.formatter, totalEnd - totalStart, results.query, results.processed)); + output.stdout(asciiSummaryOfQueryResult(output.formatter, totalEnd - totalStart, results.query, results.processed)); } } }; diff --git a/src/documentation/README.md b/src/documentation/README.md index df975852fa..e704b06005 100644 --- a/src/documentation/README.md +++ b/src/documentation/README.md @@ -1,4 +1,4 @@ # flowR Documentation Generation -Files here are used to generate parts of the documentation for the project +Files here (root) are used to generate parts of the documentation for the project The scripts may be invoked with `ts-node` from the root directory. diff --git a/src/documentation/doc-util/doc-dfg.ts b/src/documentation/doc-util/doc-dfg.ts new file mode 100644 index 0000000000..0d3af00dea --- /dev/null +++ b/src/documentation/doc-util/doc-dfg.ts @@ -0,0 +1,95 @@ +import type { DataflowGraph } from '../../dataflow/graph/graph'; +import type { RShell } from '../../r-bridge/shell'; +import type { MermaidMarkdownMark } from '../../util/mermaid/dfg'; +import { graphToMermaid } from '../../util/mermaid/dfg'; +import { PipelineExecutor } from '../../core/pipeline-executor'; +import { DEFAULT_DATAFLOW_PIPELINE } from '../../core/steps/pipeline/default-pipelines'; +import { requestFromInput } from '../../r-bridge/retriever'; +import { deterministicCountingIdGenerator } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'; +import { resolveDataflowGraph } from '../../dataflow/graph/resolve-graph'; +import type { DataflowDifferenceReport } from '../../dataflow/graph/diff'; +import { diffOfDataflowGraphs } from '../../dataflow/graph/diff'; +import { guard } from '../../util/assert'; +import { printAsMs } from './doc-ms'; + +function printDfGraph(graph: DataflowGraph, mark?: ReadonlySet) { + return ` +\`\`\`mermaid +${graphToMermaid({ + graph, + prefix: 'flowchart LR', + mark + }).string} +\`\`\` + `; +} + +export interface PrintDataflowGraphOptions { + readonly mark?: ReadonlySet; + readonly showCode?: boolean; +} +export async function printDfGraphForCode(shell: RShell, code: string, { mark, showCode }: PrintDataflowGraphOptions = {}) { + const now = performance.now(); + const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(code) + }).allRemainingSteps(); + const duration = performance.now() - now; + + const metaInfo = `The analysis required _${printAsMs(duration)}_ (including parsing and normalization) within the generation environment.`; + + return '\n\n' + '-'.repeat(42) + '\n' + printDfGraph(result.dataflow.graph, mark) + (showCode ? ` +
+ +R Code of the Dataflow Graph + +${metaInfo} +${mark ? `The following marks are used in the graph to highlight sub-parts (uses ids): ${[...mark].join(', ')}.` : ''} + +\`\`\`r +${code} +\`\`\` + +
+ +Mermaid Code (without markings) + +\`\`\` +${graphToMermaid({ + graph: result.dataflow.graph, + prefix: 'flowchart LR' + }).string} +\`\`\` + +
+ +
+ + ` : '\n(' + metaInfo + ')\n\n') + + '-'.repeat(42) + ; +} + + +/** returns resolved expected df graph */ +export async function verifyExpectedSubgraph(shell: RShell, code: string, expectedSubgraph: DataflowGraph): Promise { + /* we verify that we get what we want first! */ + const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(code), + getId: deterministicCountingIdGenerator(0) + }).allRemainingSteps(); + + expectedSubgraph.setIdMap(info.normalize.idMap); + expectedSubgraph = resolveDataflowGraph(expectedSubgraph); + const report: DataflowDifferenceReport = diffOfDataflowGraphs( + { name: 'expected', graph: expectedSubgraph }, + { name: 'got', graph: info.dataflow.graph }, + { + leftIsSubgraph: true + } + ); + + guard(report.isEqual(), () => `report:\n * ${report.comments()?.join('\n * ') ?? ''}`); + return expectedSubgraph; +} diff --git a/src/documentation/doc-util/doc-files.ts b/src/documentation/doc-util/doc-files.ts new file mode 100644 index 0000000000..ee6842b2b6 --- /dev/null +++ b/src/documentation/doc-util/doc-files.ts @@ -0,0 +1,16 @@ +import fs from 'fs'; + +export const RemoteFlowrFilePathBaseRef = 'https://github.com/flowr-analysis/flowr/tree/main/'; +export const FlowrWikiBaseRef = 'https://github.com/flowr-analysis/flowr/wiki/'; + +export function getFilePathMd(path: string): string { + // we go one up as we are in doc-util now :D #convenience + const fullpath = require.resolve('../' + path); + const relative = fullpath.replace(process.cwd(), '.'); + /* remove project prefix */ + return `[\`${relative}\`](${RemoteFlowrFilePathBaseRef}${relative})`; +} + +export function getFileContent(path: string): string { + return fs.readFileSync(require.resolve(path), 'utf-8'); +} diff --git a/src/documentation/doc-util/doc-ms.ts b/src/documentation/doc-util/doc-ms.ts new file mode 100644 index 0000000000..4cab700dc4 --- /dev/null +++ b/src/documentation/doc-util/doc-ms.ts @@ -0,0 +1,5 @@ + +export function printAsMs(ms: number, precision = 2): string { + /* eslint-disable-next-line no-irregular-whitespace*/ + return `${ms.toFixed(precision)} ms`; +} diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts new file mode 100644 index 0000000000..56b6f41066 --- /dev/null +++ b/src/documentation/doc-util/doc-query.ts @@ -0,0 +1,72 @@ +import type { RShell } from '../../r-bridge/shell'; +import type { Queries, SupportedQueryTypes } from '../../queries/query'; +import { executeQueries } from '../../queries/query'; +import { PipelineExecutor } from '../../core/pipeline-executor'; +import { DEFAULT_DATAFLOW_PIPELINE } from '../../core/steps/pipeline/default-pipelines'; +import { requestFromInput } from '../../r-bridge/retriever'; +import { printAsMs } from './doc-ms'; +import { jsonReplacer } from '../../util/json'; +import { markdownFormatter } from '../../util/ansi'; +import { asciiSummaryOfQueryResult } from '../../cli/repl/commands/repl-query'; +import { FlowrWikiBaseRef } from './doc-files'; + +export interface ShowQueryOptions { + readonly showCode?: boolean; +} + +export async function showQuery(shell: RShell, code: string, queries: Queries, { showCode }: ShowQueryOptions = {}): Promise { + const now = performance.now(); + const analysis = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput(code) + }).allRemainingSteps(); + const results = executeQueries({ graph: analysis.dataflow.graph, ast: analysis.normalize }, queries); + const duration = performance.now() - now; + + const metaInfo = ` +The analysis required _${printAsMs(duration)}_ (including parsing and normalization and the query) within the generation environment. + `.trim(); + + const resultAsString = JSON.stringify(results, jsonReplacer, 2); + + return ` + +\`\`\`json +${JSON.stringify(queries, jsonReplacer, 2)} +\`\`\` + +Results (prettified and summarized): + +${ + asciiSummaryOfQueryResult(markdownFormatter, duration, results, analysis) +} + + +
Show Detailed Results as Json + +${metaInfo} + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more information on how to get those. + +\`\`\`json +${resultAsString} +\`\`\` + +${ + showCode ? ` +
Original Code + +\`\`\`r +${code} +\`\`\` + +
+ ` : '' +} + +
+ + `; + +} diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index e07509f71a..8f1bf25169 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -1,94 +1,25 @@ import { DataflowGraph } from '../dataflow/graph/graph'; import type { MermaidMarkdownMark } from '../util/mermaid/dfg'; -import { graphToMermaid } from '../util/mermaid/dfg'; import { flowrVersion } from '../util/version'; -import { PipelineExecutor } from '../core/pipeline-executor'; -import { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-pipelines'; -import { requestFromInput } from '../r-bridge/retriever'; import { RShell } from '../r-bridge/shell'; import { VertexType } from '../dataflow/graph/vertex'; import { EdgeType } from '../dataflow/graph/edge'; import { emptyGraph } from '../dataflow/graph/dataflowgraph-builder'; -import { deterministicCountingIdGenerator } from '../r-bridge/lang-4.x/ast/model/processing/decorate'; -import { resolveDataflowGraph } from '../dataflow/graph/resolve-graph'; -import type { DataflowDifferenceReport } from '../dataflow/graph/diff'; -import { diffOfDataflowGraphs } from '../dataflow/graph/diff'; import { guard } from '../util/assert'; import { defaultEnv } from '../../test/functionality/_helper/dataflow/environment-builder'; import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; import { LogLevel } from '../util/log'; +import { printDfGraphForCode, verifyExpectedSubgraph } from './doc-util/doc-dfg'; +import { getFilePathMd } from './doc-util/doc-files'; - -const baseRef = 'https://github.com/flowr-analysis/flowr/tree/main/'; - -function getFilePathMd(path: string): string { - const fullpath = require.resolve(path); - const relative = fullpath.replace(process.cwd(), '.'); - /* remove project prefix */ - return `[\`${relative}\`](${baseRef}${relative})`; -} - -function printDfGraph(graph: DataflowGraph, mark?: ReadonlySet) { - return ` -\`\`\`mermaid -${graphToMermaid({ - graph, - prefix: 'flowchart LR', - mark - }).string} -\`\`\` - `; -} - -async function printDfGraphForCode(shell: RShell, code: string, mark?: ReadonlySet) { - const now = performance.now(); - const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { - shell, - request: requestFromInput(code) - }).allRemainingSteps(); - const duration = performance.now() - now; - - return '\n\n' + '-'.repeat(42) + '\n' + printDfGraph(result.dataflow.graph, mark) + ` -
- -R Code of the Dataflow Graph - -${'' /* eslint-disable-next-line no-irregular-whitespace*/} -The analysis required _${duration.toFixed(2)} ms_ (including parsing and normalization) within the generation environment. -${mark ? `The following marks are used in the graph to highlight sub-parts (uses ids): ${[...mark].join(', ')}.` : ''} - -\`\`\`r -${code} -\`\`\` - -
- -Mermaid Code (without markings) - -\`\`\` -${graphToMermaid({ - graph: result.dataflow.graph, - prefix: 'flowchart LR' - }).string} -\`\`\` - -
- -
- -${'-'.repeat(42)} - - `; -} - -interface SubExplanationParameters { +export interface SubExplanationParameters { readonly name: string, readonly description: string, readonly code: string, readonly expectedSubgraph: DataflowGraph } -interface ExplanationParameters { +export interface ExplanationParameters { readonly shell: RShell, readonly name: string, readonly type: VertexType | EdgeType, @@ -105,28 +36,6 @@ function getAllEdges(): [string, EdgeType][] { return Object.entries(EdgeType).filter(([,v]) => Number.isInteger(v)) as [string, EdgeType][]; } -/** returns resolved expected df graph */ -async function verifyExpectedSubgraph(shell: RShell, code: string, expectedSubgraph: DataflowGraph): Promise { - /* we verify that we get what we want first! */ - const info = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { - shell, - request: requestFromInput(code), - getId: deterministicCountingIdGenerator(0) - }).allRemainingSteps(); - - expectedSubgraph.setIdMap(info.normalize.idMap); - expectedSubgraph = resolveDataflowGraph(expectedSubgraph); - const report: DataflowDifferenceReport = diffOfDataflowGraphs( - { name: 'expected', graph: expectedSubgraph }, - { name: 'got', graph: info.dataflow.graph }, - { - leftIsSubgraph: true - } - ); - - guard(report.isEqual(), () => `report:\n * ${report.comments()?.join('\n * ') ?? ''}`); - return expectedSubgraph; -} async function subExplanation(shell: RShell, { description, code, expectedSubgraph }: SubExplanationParameters): Promise { expectedSubgraph = await verifyExpectedSubgraph(shell, code, expectedSubgraph); @@ -142,7 +51,7 @@ async function subExplanation(shell: RShell, { description, code, expectedSubgra } return ` -${await printDfGraphForCode(shell, code, new Set(marks))} +${await printDfGraphForCode(shell, code, { mark: new Set(marks) })} ${description}`; @@ -430,6 +339,7 @@ if(require.main === module) { const shell = new RShell(); void getText(shell).then(str => { console.log(str); + }).finally(() => { shell.close(); }); } diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts new file mode 100644 index 0000000000..3447eb0c42 --- /dev/null +++ b/src/documentation/print-query-wiki.ts @@ -0,0 +1,117 @@ +import { RShell } from '../r-bridge/shell'; +import { flowrVersion } from '../util/version'; +import { printDfGraphForCode } from './doc-util/doc-dfg'; +import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; +import { LogLevel } from '../util/log'; +import { executeQueries } from '../queries/query'; +import { FlowrWikiBaseRef, getFilePathMd } from './doc-util/doc-files'; +import { showQuery } from './doc-util/doc-query'; +import { CallTargets } from '../queries/call-context-query/call-context-query-format'; +import { describeSchema } from '../util/schema'; +import { QueriesSchema } from '../queries/query-schema'; +import { markdownFormatter } from '../util/ansi'; + +const fileCode = ` +library(ggplot) +library(dplyr) +library(readr) + +# read data with read_csv +data <- read_csv('data.csv') +data2 <- read_csv('data2.csv') + +mean <- mean(data$x) +print(mean) + +data %>% + ggplot(aes(x = x, y = y)) + + geom_point() + +print(mean(data2$k)) +`.trim(); + +async function getText(shell: RShell) { + const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; + const currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC'); + return `_This document was generated automatically from '${module.filename}' on ${currentDateAndTime} presenting an overview of flowR's dataflow graph (version: ${flowrVersion().format()}, samples generated with R version ${rversion})._ + +This page briefly summarizes flowR's query API, represented by the ${executeQueries.name} function in ${getFilePathMd('../queries/query.ts')}. +Please see the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more information on how to access this API (TODO TODO TODO). + +First, consider that you have a file like the following (of course, this is just a simple and artificial example): + +\`\`\`r +${fileCode} +\`\`\` + +
Dataflow Graph of the Example + +${await printDfGraphForCode(shell, fileCode, { showCode: false })} + +
+ +  + +Additionally, consider that you are interested in all function calls which loads data with \`read_csv\`. +A simple \`regex\`-based query could look like this: \`^read_csv$\`. +However, this fails to incorporate + +1. Syntax-based information (comments, strings, used as a variable, called as a higher-order function, ...) +2. Semantic information (e.g., \`read_csv\` is overwritten by a function with the same name) +3. Context information (e.g., calls like \`points\` may link to the current plot) + +To solve this, flowR provides a query API which allows you to specify queries on the dataflow graph. +For the specific use-case stated, you could use the [Call-Context Query](#call-context-query) to find all calls to \`read_csv\` which refer functions that are not overwritten. + +Just as an example, the following [Call-Context Query](#call-context-query) finds all calls to \`read_csv\` that are not overwritten: + +${await showQuery(shell, fileCode, [{ type: 'call-context', callName: '^read_csv$', callTargets: CallTargets.OnlyGlobal, kind: 'input', subkind: 'csv-file' }], { showCode: false })} + +## The Query Format + +Queries are JSON arrays of query objects, each of which uses a \`type\` property to specify the query type. + +The following query types are currently supported: + +${'' /* TODO: automate */} +1. [Call-Context Query](#call-context-query) +2. [Compound Query (virtual)](#compound-query) + +TODO TOOD TODO get thef format to work + + +
+ + +Detailed Query Format + +${ + describeSchema(QueriesSchema, markdownFormatter) +} + +
+ +### Supported Queries + +#### Call-Context Query + +### Supported Virtual Queries + +#### Compound Query + + + +`; +} + +/** if we run this script, we want a Markdown representation of the capabilities */ +if(require.main === module) { + setMinLevelOfAllLogs(LogLevel.Fatal); + + const shell = new RShell(); + void getText(shell).then(str => { + console.log(str); + }).finally(() => { + shell.close(); + }); +} diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index 7d21281f4b..fa3f6aee58 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -53,9 +53,13 @@ function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: Call case CallTargets.Any: return callTargets; case CallTargets.OnlyGlobal: - return builtIn && callTargets.length === 0 ? [BuiltIn] : 'no'; + if(callTargets.length === 0) { + return builtIn ? [BuiltIn] : []; + } else { + return 'no'; + } case CallTargets.MustIncludeGlobal: - return builtIn ? [...callTargets, BuiltIn] : 'no'; + return builtIn || callTargets.length === 0 ? [...callTargets, BuiltIn] : 'no'; case CallTargets.OnlyLocal: return !builtIn && callTargets.length > 0 ? callTargets : 'no'; case CallTargets.MustIncludeLocal: diff --git a/src/util/ansi.ts b/src/util/ansi.ts index e44ef750d6..64b6089f88 100644 --- a/src/util/ansi.ts +++ b/src/util/ansi.ts @@ -56,6 +56,29 @@ export const voidFormatter: OutputFormatter = new class implements OutputFormatt } }(); +export const markdownFormatter: OutputFormatter = new class implements OutputFormatter { + public format(input: string, options?: FormatOptions): string { + if(options && 'style' in options) { + if(options.style === FontStyles.Bold) { + input = `**${input}**`; + } else if(options.style === FontStyles.Italic) { + input = `_${input}_`; + } else { + throw new Error(`Unsupported font style: ${options.style}`); + } + } + return input.replaceAll(/\n/g, '\\\n').replaceAll(/ /g, ' '); + } + + public getFormatString(_options?: FormatOptions): string { + return ''; + } + + public reset(): string { + return ''; + } +}(); + /** * This does not work if the {@link setFormatter|formatter} is void. Tries to format the text with a bold font weight. */ diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index 7957852579..463e891e8e 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -71,6 +71,7 @@ describe('Call Context Query', withShell(shell => { testQuery('Multiple wanted Calls', 'print(1); print(2)', [q(/print/)], r([{ id: 3 }, { id: 7 }])); testQuery('Print calls (global)', 'print(1)', [q(/print/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [BuiltIn] }])); testQuery('Higher-Order Calls', 'lapply(c(1,2,3),print)', [q(/print/)], r([{ id: 10 }])); + testQuery('Print calls (global)', 'read_csv(x)', [q(/read_csv/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [] }])); }); describe('Mixed Targets', () => { const code = 'if(x) { print <- function() {} }\nprint()'; diff --git a/wiki/Query API.md b/wiki/Query API.md new file mode 100644 index 0000000000..58eb4e9cfd --- /dev/null +++ b/wiki/Query API.md @@ -0,0 +1,354 @@ +_This document was generated automatically from '/home/limerent/GitHub/phd/flowr/src/documentation/print-query-wiki.ts' on 2024-09-22, 12:50:06 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, samples generated with R version 4.4.1)._ + +This page briefly summarizes flowR's query API, represented by the executeQueries function in [`./src/queries/query.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/query.ts). +Please see the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to access this API (TODO TODO TODO). + +First, consider that you have a file like the following (of course, this is just a simple and artificial example): + +```r +library(ggplot) +library(dplyr) +library(readr) + +# read data with read_csv +data <- read_csv('data.csv') +data2 <- read_csv('data2.csv') + +mean <- mean(data$x) +print(mean) + +data %>% + ggplot(aes(x = x, y = y)) + + geom_point() + +print(mean(data2$k)) +``` + +
Dataflow Graph of the Example + + + +------------------------------------------ + +```mermaid +flowchart LR + 1{{"`#91;RSymbol#93; ggplot + (1) + *1.9-14*`"}} + 3[["`#91;RFunctionCall#93; library + (3) + *1.1-15* + (1)`"]] + 5{{"`#91;RSymbol#93; dplyr + (5) + *2.9-13*`"}} + 7[["`#91;RFunctionCall#93; library + (7) + *2.1-14* + (5)`"]] + 9{{"`#91;RSymbol#93; readr + (9) + *3.9-13*`"}} + 11[["`#91;RFunctionCall#93; library + (11) + *3.1-14* + (9)`"]] + 14{{"`#91;RString#93; 'data.csv' + (14) + *6.18-27*`"}} + 16[["`#91;RFunctionCall#93; read#95;csv + (16) + *6.9-28* + (14)`"]] + 12["`#91;RSymbol#93; data + (12) + *6.1-4*`"] + 17[["`#91;RBinaryOp#93; #60;#45; + (17) + *6.1-28* + (12, 16)`"]] + 20{{"`#91;RString#93; 'data2.csv' + (20) + *7.19-29*`"}} + %% Environment of 22 [level: 0]: + %% Built-in + %% 15---------------------------------------- + %% data: {data (12, variable, def. @17)} + 22[["`#91;RFunctionCall#93; read#95;csv + (22) + *7.10-30* + (20)`"]] + 18["`#91;RSymbol#93; data2 + (18) + *7.1-5*`"] + 23[["`#91;RBinaryOp#93; #60;#45; + (23) + *7.1-30* + (18, 22)`"]] + 26(["`#91;RSymbol#93; data + (26) + *9.14-17*`"]) + 27{{"`#91;RSymbol#93; x + (27) + *9.14-19*`"}} + 29[["`#91;RAccess#93; $ + (29) + *9.14-19* + (26, 27)`"]] + 31[["`#91;RFunctionCall#93; mean + (31) + *9.9-20* + (29)`"]] + 24["`#91;RSymbol#93; mean + (24) + *9.1-4*`"] + 32[["`#91;RBinaryOp#93; #60;#45; + (32) + *9.1-20* + (24, 31)`"]] + 34(["`#91;RSymbol#93; mean + (34) + *10.7-10*`"]) + 36[["`#91;RFunctionCall#93; print + (36) + *10.1-11* + (34)`"]] + 38(["`#91;RSymbol#93; data + (38) + *12.1-4*`"]) + 43(["`#91;RSymbol#93; x + (43) + *13.24*`"]) + 44(["`#91;RArgument#93; x + (44) + *13.20*`"]) + 46(["`#91;RSymbol#93; y + (46) + *13.31*`"]) + 47(["`#91;RArgument#93; y + (47) + *13.27*`"]) + %% Environment of 48 [level: 0]: + %% Built-in + %% 37---------------------------------------- + %% data: {data (12, variable, def. @17)} + %% data2: {data2 (18, variable, def. @23)} + %% mean: {mean (24, variable, def. @32)} + 48[["`#91;RFunctionCall#93; aes + (48) + *13.16-32* + (x (44), y (47))`"]] + %% Environment of 50 [level: 0]: + %% Built-in + %% 37---------------------------------------- + %% data: {data (12, variable, def. @17)} + %% data2: {data2 (18, variable, def. @23)} + %% mean: {mean (24, variable, def. @32)} + 50[["`#91;RFunctionCall#93; ggplot + (50) + *13.9-33* + (38, 48)`"]] + 52[["`#91;RFunctionCall#93; data %#62;% + ggplot(aes(x = x, y = y)) + (52) + *12.6-8* + (38, 50)`"]] + %% Environment of 54 [level: 0]: + %% Built-in + %% 47---------------------------------------- + %% data: {data (12, variable, def. @17)} + %% data2: {data2 (18, variable, def. @23)} + %% mean: {mean (24, variable, def. @32)} + 54[["`#91;RFunctionCall#93; geom#95;point + (54) + *14.9-20*`"]] + 55[["`#91;RBinaryOp#93; #43; + (55) + *12.1-14.20* + (52, 54)`"]] + 58(["`#91;RSymbol#93; data2 + (58) + *16.12-16*`"]) + 59{{"`#91;RSymbol#93; k + (59) + *16.12-18*`"}} + 61[["`#91;RAccess#93; $ + (61) + *16.12-18* + (58, 59)`"]] + %% Environment of 63 [level: 0]: + %% Built-in + %% 48---------------------------------------- + %% data: {data (12, variable, def. @17)} + %% data2: {data2 (18, variable, def. @23)} + %% mean: {mean (24, variable, def. @32)} + 63[["`#91;RFunctionCall#93; mean + (63) + *16.7-19* + (61)`"]] + 65[["`#91;RFunctionCall#93; print + (65) + *16.1-20* + (63)`"]] + 3 -->|"argument"| 1 + 7 -->|"argument"| 5 + 11 -->|"argument"| 9 + 16 -->|"argument"| 14 + 12 -->|"defined-by"| 16 + 12 -->|"defined-by"| 17 + 17 -->|"argument"| 16 + 17 -->|"returns, argument"| 12 + 22 -->|"argument"| 20 + 18 -->|"defined-by"| 22 + 18 -->|"defined-by"| 23 + 23 -->|"argument"| 22 + 23 -->|"returns, argument"| 18 + 26 -->|"reads"| 12 + 29 -->|"reads, returns, argument"| 26 + 29 -->|"reads, argument"| 27 + 31 -->|"reads, argument"| 29 + 24 -->|"defined-by"| 31 + 24 -->|"defined-by"| 32 + 32 -->|"argument"| 31 + 32 -->|"returns, argument"| 24 + 34 -->|"reads"| 24 + 36 -->|"reads, returns, argument"| 34 + 38 -->|"reads"| 12 + 44 -->|"reads"| 43 + 47 -->|"reads"| 46 + 48 -->|"reads"| 43 + 48 -->|"argument"| 44 + 48 -->|"reads"| 46 + 48 -->|"argument"| 47 + 50 -->|"reads, argument"| 48 + 50 -->|"argument"| 38 + 52 -->|"argument"| 38 + 52 -->|"argument"| 50 + 55 -->|"reads, argument"| 52 + 55 -->|"reads, argument"| 54 + 58 -->|"reads"| 18 + 61 -->|"reads, returns, argument"| 58 + 61 -->|"reads, argument"| 59 + 63 -->|"reads"| 24 + 63 -->|"argument"| 61 + 65 -->|"reads, returns, argument"| 63 +``` + +(The analysis required _16.82 ms_ (including parsing and normalization) within the generation environment.) + +------------------------------------------ + +
+ +  + +Additionally, consider that you are interested in all function calls which loads data with `read_csv`. +A simple `regex`-based query could look like this: `^read_csv$`. +However, this fails to incorporate + +1. Syntax-based information (comments, strings, used as a variable, called as a higher-order function, ...) +2. Semantic information (e.g., `read_csv` is overwritten by a function with the same name) +3. Context information (e.g., calls like `points` may link to the current plot) + +To solve this, flowR provides a query API which allows you to specify queries on the dataflow graph. +For the specific use-case stated, you could use the [Call-Context Query](#call-context-query) to find all calls to `read_csv` which refer functions that are not overwritten. + +Just as an example, the following [Call-Context Query](#call-context-query) finds all calls to `read_csv` that are not overwritten: + + + +```json +[ + { + "type": "call-context", + "callName": "^read_csv$", + "callTargets": "global", + "kind": "input", + "subkind": "csv-file" + } +] +``` + +Results (prettified and summarized): + +Query: **call-context** (0ms)\ +   ╰ **input**\ +     ╰ **csv-file**: **read_csv** (L.6), **read_csv** (L.7)\ +_All queries together required ≈1ms (total 7ms)_ + + +
Show Detailed Results as Json + +The analysis required _6.96 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + +```json +{ + "call-context": { + ".meta": { + "timing": 0 + }, + "kinds": { + "input": { + "subkinds": { + "csv-file": [ + { + "id": 16, + "calls": [] + }, + { + "id": 22, + "calls": [] + } + ] + } + } + } + }, + ".meta": { + "timing": 1 + } +} +``` + + + +
+ + + +## The Query Format + +Queries are JSON arrays of query objects, each of which uses a `type` property to specify the query type. + +The following query types are currently supported: + + +1. [Call-Context Query](#call-context-query) +2. [Compound Query (virtual)](#compound-query) + +TODO TOOD TODO get thef format to work + + +
+ + +Detailed Query Format + +- **.** array + +
+ +### Supported Queries + +#### Call-Context Query + +### Supported Virtual Queries + +#### Compound Query + + + + diff --git a/wiki/_Sidebar.md b/wiki/_Sidebar.md index 0bfbc8182f..0531fd9ff7 100644 --- a/wiki/_Sidebar.md +++ b/wiki/_Sidebar.md @@ -2,6 +2,7 @@ * [Setup](https://github.com/flowr-analysis/flowr/wiki/Setup) * [Overview](https://github.com/flowr-analysis/flowr/wiki/Overview) * [Interface](https://github.com/flowr-analysis/flowr/wiki/Interface) + * [Query API](https://github.com/flowr-analysis/flowr/wiki/Query-API) * [Core](https://github.com/flowr-analysis/flowr/wiki/Core) * [Normalized AST](https://github.com/flowr-analysis/flowr/wiki/Normalized-AST) * [Dataflow Graph](https://github.com/flowr-analysis/flowr/wiki/Dataflow-Graph) From ed95ab008e501671c39a25da39f94f49f459a4e8 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Sun, 22 Sep 2024 21:45:54 +0200 Subject: [PATCH 24/41] doc(query): extend query documentation --- src/cli/repl/commands/repl-query.ts | 23 +- src/documentation/doc-util/doc-files.ts | 4 - src/documentation/doc-util/doc-query.ts | 84 ++- .../print-dataflow-graph-wiki.ts | 2 +- src/documentation/print-query-wiki.ts | 147 +++- src/queries/query.ts | 1 - src/queries/virtual-query/virtual-queries.ts | 2 + src/util/ansi.ts | 1 + src/util/schema.ts | 9 + .../query/call-context-query-tests.ts | 6 +- wiki/Query API.md | 655 ++++++++++++++++-- 11 files changed, 834 insertions(+), 100 deletions(-) diff --git a/src/cli/repl/commands/repl-query.ts b/src/cli/repl/commands/repl-query.ts index a5f6179ea4..b0d4dd2415 100644 --- a/src/cli/repl/commands/repl-query.ts +++ b/src/cli/repl/commands/repl-query.ts @@ -15,6 +15,8 @@ import type { PipelineOutput } from '../../../core/steps/pipeline/pipeline'; import type { BaseQueryMeta } from '../../../queries/base-query-format'; import { jsonReplacer } from '../../../util/json'; import { AnyQuerySchema, QueriesSchema } from '../../../queries/query-schema'; +import type { NodeId } from '../../../r-bridge/lang-4.x/ast/model/processing/node-id'; +import { BuiltIn } from '../../../dataflow/environments/built-in'; async function getDataflow(shell: RShell, remainingLine: string) { return await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { @@ -66,7 +68,18 @@ async function processQueryArgs(line: string, shell: RShell, output: ReplOutput) }; } -function asciiCallContextSubHit(formatter: OutputFormatter, results: CallContextQuerySubKindResult[], processed: PipelineOutput): string { +function nodeString(id: NodeId, formatter: OutputFormatter, processed: PipelineOutput): string { + if(id === BuiltIn) { + return italic('built-in', formatter); + } + const node = processed.normalize.idMap.get(id); + if(node === undefined) { + return `UNKNOWN: ${id}`; + } + return `${italic('`' + (node.lexeme ?? node.info.fullLexeme ?? 'UNKNOWN') + '`', formatter)} (L.${node.location?.[0]})`; +} + +function asciiCallContextSubHit(formatter: OutputFormatter, results: readonly CallContextQuerySubKindResult[], processed: PipelineOutput): string { const result: string[] = []; for(const { id, calls = [], linkedIds = [] } of results) { const node = processed.normalize.idMap.get(id); @@ -74,12 +87,12 @@ function asciiCallContextSubHit(formatter: OutputFormatter, results: CallContext result.push(` ${bold('UNKNOWN: ' + JSON.stringify({ calls, linkedIds }))}`); continue; } - let line = `${bold(node.lexeme ?? node.info.fullLexeme ?? 'UNKKNOWN', formatter)} (L.${node.location?.[0]})`; + let line = nodeString(id, formatter, processed); if(calls.length > 0) { - line += ` ${calls.length} calls`; + line += ` with ${calls.length} call${calls.length > 1 ? 's' : ''} (${calls.map(c => nodeString(c, formatter, processed)).join(', ')})`; } if(linkedIds.length > 0) { - line += ` ${linkedIds.length} links`; + line += ` with ${linkedIds.length} link${linkedIds.length > 1 ? 's' : ''} (${linkedIds.map(c => nodeString(c, formatter, processed)).join(', ')})`; } result.push(line); } @@ -126,7 +139,7 @@ export function asciiSummaryOfQueryResult(formatter: OutputFormatter, totalInMs: result.push(` - Took ${timing.toFixed(0)}ms`); } - result.push(italic(`All queries together required ≈${results['.meta'].timing.toFixed(0)}ms (total ${totalInMs.toFixed(0)}ms)`, formatter)); + result.push(italic(`All queries together required ≈${results['.meta'].timing.toFixed(0)}ms (1ms accuracy, total ${totalInMs.toFixed(0)}ms)`, formatter)); return formatter.format(result.join('\n')); } diff --git a/src/documentation/doc-util/doc-files.ts b/src/documentation/doc-util/doc-files.ts index ee6842b2b6..7b14f40ac1 100644 --- a/src/documentation/doc-util/doc-files.ts +++ b/src/documentation/doc-util/doc-files.ts @@ -1,4 +1,3 @@ -import fs from 'fs'; export const RemoteFlowrFilePathBaseRef = 'https://github.com/flowr-analysis/flowr/tree/main/'; export const FlowrWikiBaseRef = 'https://github.com/flowr-analysis/flowr/wiki/'; @@ -11,6 +10,3 @@ export function getFilePathMd(path: string): string { return `[\`${relative}\`](${RemoteFlowrFilePathBaseRef}${relative})`; } -export function getFileContent(path: string): string { - return fs.readFileSync(require.resolve(path), 'utf-8'); -} diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts index 56b6f41066..4f86b4bab6 100644 --- a/src/documentation/doc-util/doc-query.ts +++ b/src/documentation/doc-util/doc-query.ts @@ -1,5 +1,5 @@ import type { RShell } from '../../r-bridge/shell'; -import type { Queries, SupportedQueryTypes } from '../../queries/query'; +import type { Queries, QueryResults, SupportedQueryTypes } from '../../queries/query'; import { executeQueries } from '../../queries/query'; import { PipelineExecutor } from '../../core/pipeline-executor'; import { DEFAULT_DATAFLOW_PIPELINE } from '../../core/steps/pipeline/default-pipelines'; @@ -8,13 +8,19 @@ import { printAsMs } from './doc-ms'; import { jsonReplacer } from '../../util/json'; import { markdownFormatter } from '../../util/ansi'; import { asciiSummaryOfQueryResult } from '../../cli/repl/commands/repl-query'; -import { FlowrWikiBaseRef } from './doc-files'; +import { FlowrWikiBaseRef, getFilePathMd } from './doc-files'; +import type { SupportedVirtualQueryTypes } from '../../queries/virtual-query/virtual-queries'; +import type { VirtualCompoundConstraint } from '../../queries/virtual-query/compound-query'; export interface ShowQueryOptions { - readonly showCode?: boolean; + readonly showCode?: boolean; + readonly collapseResult?: boolean; } -export async function showQuery(shell: RShell, code: string, queries: Queries, { showCode }: ShowQueryOptions = {}): Promise { +export async function showQuery< + Base extends SupportedQueryTypes, + VirtualArguments extends VirtualCompoundConstraint = VirtualCompoundConstraint +>(shell: RShell, code: string, queries: Queries, { showCode, collapseResult }: ShowQueryOptions = {}): Promise { const now = performance.now(); const analysis = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, @@ -35,13 +41,14 @@ The analysis required _${printAsMs(duration)}_ (including parsing and normalizat ${JSON.stringify(queries, jsonReplacer, 2)} \`\`\` -Results (prettified and summarized): +${collapseResult ? '
Show Results' : ''} + +_Results (prettified and summarized):_ ${ - asciiSummaryOfQueryResult(markdownFormatter, duration, results, analysis) + asciiSummaryOfQueryResult(markdownFormatter, duration, results as QueryResults<'call-context'>, analysis) } -
Show Detailed Results as Json ${metaInfo} @@ -67,6 +74,69 @@ ${code}
+${collapseResult ? '
' : ''} + `; } + +export interface QueryDocumentation { + readonly name: string; + readonly type: 'virtual' | 'active'; + readonly shortDescription: string; + readonly functionName: string; + readonly functionFile: string; + readonly buildExplanation: (shell: RShell) => Promise; +} + +export const RegisteredQueries = { + 'active': new Map(), + 'virtual': new Map() +}; + +export function registerQueryDocumentation(query: SupportedQueryTypes | SupportedVirtualQueryTypes, doc: QueryDocumentation) { + const map = RegisteredQueries[doc.type]; + if(map.has(query)) { + throw new Error(`Query ${query} already registered`); + } + map.set(query, doc); +} + +function linkify(name: string) { + return name.toLowerCase().replace(/ /g, '-'); +} + +export function tocForQueryType(type: 'active' | 'virtual') { + const queries = RegisteredQueries[type]; + const result: string[] = []; + for(const [id, { name, shortDescription }] of queries) { + result.push(`1. [${name}](#${linkify(name)}) (\`${id}\`):\\\n ${shortDescription}`); + } + return result.join('\n'); +} + +async function explainQuery(shell: RShell, { name, functionName, functionFile, buildExplanation }: QueryDocumentation) { + return ` +### ${name} + +${await buildExplanation(shell)} + +
+ +Implementation Details + +Responsible for the execution of the ${name} query is \`${functionName}\` in ${getFilePathMd(functionFile)}. + +
+ +`; +} + +export async function explainQueries(shell: RShell, type: 'active' | 'virtual'): Promise { + const queries = RegisteredQueries[type]; + const result: string[] = []; + for(const doc of queries.values()) { + result.push(await explainQuery(shell, doc)); + } + return result.join('\n\n\n'); +} diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index 8f1bf25169..4acad263f0 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -281,7 +281,7 @@ async function getEdgesExplanations(shell: RShell): Promise { async function getText(shell: RShell) { const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; const currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC'); - return `_This document was generated automatically from '${module.filename}' on ${currentDateAndTime} presenting an overview of flowR's dataflow graph (version: ${flowrVersion().format()}, samples generated with R version ${rversion})._ + return `_This document was generated automatically from '${module.filename}' on ${currentDateAndTime} presenting an overview of flowR's dataflow graph (version: ${flowrVersion().format()}, samples are generated with R version ${rversion})._ This page briefly summarizes flowR's dataflow graph, represented by ${DataflowGraph.name} in ${getFilePathMd('../dataflow/graph/graph.ts')}. In case you want to manually build such a graph (e.g., for testing), you can use the builder in ${getFilePathMd('../dataflow/graph/dataflowgraph-builder.ts')}. diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts index 3447eb0c42..a0b4e94573 100644 --- a/src/documentation/print-query-wiki.ts +++ b/src/documentation/print-query-wiki.ts @@ -5,11 +5,13 @@ import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; import { LogLevel } from '../util/log'; import { executeQueries } from '../queries/query'; import { FlowrWikiBaseRef, getFilePathMd } from './doc-util/doc-files'; -import { showQuery } from './doc-util/doc-query'; +import { explainQueries, registerQueryDocumentation, showQuery, tocForQueryType } from './doc-util/doc-query'; import { CallTargets } from '../queries/call-context-query/call-context-query-format'; import { describeSchema } from '../util/schema'; import { QueriesSchema } from '../queries/query-schema'; import { markdownFormatter } from '../util/ansi'; +import { executeCallContextQueries } from '../queries/call-context-query/call-context-query-executor'; +import { executeCompoundQueries } from '../queries/virtual-query/compound-query'; const fileCode = ` library(ggplot) @@ -20,20 +22,125 @@ library(readr) data <- read_csv('data.csv') data2 <- read_csv('data2.csv') -mean <- mean(data$x) -print(mean) +m <- mean(data$x) +print(m) data %>% ggplot(aes(x = x, y = y)) + geom_point() +plot(data2$x, data2$y) +points(data2$x, data2$y) + print(mean(data2$k)) `.trim(); + +registerQueryDocumentation('call-context', { + name: 'Call-Context Query', + type: 'active', + shortDescription: 'Finds all calls in a set of files that matches specified criteria.', + functionName: executeCallContextQueries.name, + functionFile: '../queries/call-context-query/call-context-query-executor.ts', + buildExplanation: async(shell: RShell) => { + return ` +Call context queries may be used to identify calls to specific functions that match criteria of your interest. +For now, we support two criteria: + +1. **Function Name** (\`callName\`): The function name is specified by a regular expression. This allows you to find all calls to functions that match a specific pattern. +2. **Call Targets** (\`callTargets\`): This specifies to what the function call targets. For example, you may want to find all calls to a function that is not defined locally. + +Besides this we provide three ways to automatically categorize and link identified invocations: + +1. **Kind** (\`kind\`): This is a general category that can be used to group calls together. For example, you may want to link all calls to \`plot\` to \`visualize\`. +2. **Subkind** (\`subkind\`): This is used to uniquely identify the respective call type when grouping the output. For example, you may want to link all calls to \`ggplot\` to \`plot\`. +3. **Linked Calls** (\`linkTo\`): This links the current call to the last call of the given kind. This way, you can link a call like \`points\` to the latest graphics plot etc. + For now, we _only_offer support for linking to the last call_ as the current flow dependency over-approximation is not stable. + +Re-using the example code from above, the following query attaches all calls to \`mean\` to the kind \`visualize\` and the subkind \`text\`, +all calls that start with \`read_\` to the kind \`input\` but only if they are not locally overwritten, and the subkind \`csv-file\`, and links all calls to \`points\` to the last call to \`plot\`: + +${ + await showQuery(shell, fileCode, [ + { type: 'call-context', callName: '^mean$', kind: 'visualize', subkind: 'text' }, + { type: 'call-context', callName: '^read_', kind: 'input', subkind: 'csv-file', callTargets: CallTargets.OnlyGlobal }, + { type: 'call-context', callName: '^points$', kind: 'visualize', subkind: 'plot', linkTo: { type: 'link-to-last-call', callName: '^plot$' } } + ], { showCode: false }) +} + +As you can see, all kinds and subkinds with the same name are grouped together. +Yet, re-stating common arguments and kinds may be cumbersome (although you can already use clever regex patterns). +See the [Compound Query](#compound-query) for a way to structure your queries more compactly if you think it gets too verbose. + + `; + } +}); + +registerQueryDocumentation('compound', { + name: 'Compound Query', + type: 'virtual', + shortDescription: 'Combines multiple queries of the same type into one, specifying common arguments.', + functionName: executeCompoundQueries.name, + functionFile: '../queries/virtual-query/compound-query.ts', + buildExplanation: async(shell: RShell) => { + return ` +A compound query comes in use, whenever we want to state multiple queries of the same type with a set of common arguments. +It offers the following properties of interest: + +1. **Query** (\`query\`): the type of the query that is to be combined. +2. **Common Arguments** (\`commonArguments\`): The arguments that are to be used as defaults for all queries (i.e., any argument the query may have). +3. **Arguments** (\`arguments\`): The other arguments for the individual queries that are to be combined. + +For example, consider the following compound query that combines two call-context queries for \`mean\` and \`print\`, both of which are to be +assigned to the kind \`visualize\` and the subkind \`text\` (using the example code from above): + +${ + await showQuery(shell, fileCode, [{ + type: 'compound', + query: 'call-context', + commonArguments: { kind: 'visualize', subkind: 'text' }, + arguments: [ + { callName: '^mean$' }, + { callName: '^print$' } + ] + }], { showCode: false }) +} + +Of course, in this specific scenario, the following query would be equivalent: + +${ + await showQuery(shell, fileCode, [ + { type: 'call-context', callName: '^(mean|print)$', kind: 'visualize', subkind: 'text' } + ], { showCode: false, collapseResult: true }) +} + +However, compound queries become more useful whenever common arguments can not be expressed as a union in one of their properties. +Additionally, you can still overwrite default arguments. +In the following, we (by default) want all calls to not resolve to a local definition, except for those to \`print\` for which we explicitly +want to resolve to a local definition: + +${ + await showQuery(shell, fileCode, [{ + type: 'compound', + query: 'call-context', + commonArguments: { kind: 'visualize', subkind: 'text', callTargets: CallTargets.OnlyGlobal }, + arguments: [ + { callName: '^mean$' }, + { callName: '^print$', callTargets: CallTargets.OnlyLocal } + ] + }], { showCode: false }) +} + +Now, the results no longer contain calls to \`plot\` that are not defined locally. + + `; + } +}); + async function getText(shell: RShell) { const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; const currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC'); - return `_This document was generated automatically from '${module.filename}' on ${currentDateAndTime} presenting an overview of flowR's dataflow graph (version: ${flowrVersion().format()}, samples generated with R version ${rversion})._ + return `_This document was generated automatically from '${module.filename}' on ${currentDateAndTime} presenting an overview of flowR's dataflow graph (version: ${flowrVersion().format()}, samples are generated with R version ${rversion})._ This page briefly summarizes flowR's query API, represented by the ${executeQueries.name} function in ${getFilePathMd('../queries/query.ts')}. Please see the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more information on how to access this API (TODO TODO TODO). @@ -70,36 +177,34 @@ ${await showQuery(shell, fileCode, [{ type: 'call-context', callName: '^read_csv ## The Query Format Queries are JSON arrays of query objects, each of which uses a \`type\` property to specify the query type. - -The following query types are currently supported: +In general, we separate two types of queries: -${'' /* TODO: automate */} -1. [Call-Context Query](#call-context-query) -2. [Compound Query (virtual)](#compound-query) +1. **Active Queries**: Are exactly what you would expect from a query (e.g., the [Call-Context Query](#call-context-query)). They fetch information from the dataflow graph. +2. **Virtual Queries**: Are used to structure your queries (e.g., the [Compound Query](#compound-query)). -TODO TOOD TODO get thef format to work +We separate these from a concept perspective. +For now, we support the following **active** queries (which we will refer to simply as a \`query\`): +${tocForQueryType('active')} -
+Similarly, we support the following **virtual** queries: +${tocForQueryType('virtual')} -Detailed Query Format - -${ - describeSchema(QueriesSchema, markdownFormatter) -} +
-
-### Supported Queries +Detailed Query Format (Automatically Generated) -#### Call-Context Query +Although it is probably better to consult the detailed explanations below, if you want to have a look at the scehma, here is its description: -### Supported Virtual Queries +${describeSchema(QueriesSchema, markdownFormatter)} -#### Compound Query +
+${await explainQueries(shell, 'active')} +${await explainQueries(shell, 'virtual')} `; } diff --git a/src/queries/query.ts b/src/queries/query.ts index 357ccc8ea2..28c4c0292e 100644 --- a/src/queries/query.ts +++ b/src/queries/query.ts @@ -21,7 +21,6 @@ export interface BasicQueryData { /* Each executor receives all queries of its type in case it wants to avoid repeated traversal */ export type QueryExecutor = (data: BasicQueryData, query: readonly Query[]) => Result; - type SupportedQueries = { [QueryType in Query['type']]: QueryExecutor, BaseQueryResult> } diff --git a/src/queries/virtual-query/virtual-queries.ts b/src/queries/virtual-query/virtual-queries.ts index 0ddeac28d4..5fd91b32d1 100644 --- a/src/queries/virtual-query/virtual-queries.ts +++ b/src/queries/virtual-query/virtual-queries.ts @@ -20,3 +20,5 @@ type SupportedVirtualQueries = { export const SupportedVirtualQueries = { 'compound': executeCompoundQueries } as const satisfies SupportedVirtualQueries; + +export type SupportedVirtualQueryTypes = keyof typeof SupportedVirtualQueries; diff --git a/src/util/ansi.ts b/src/util/ansi.ts index 64b6089f88..71d4e124d7 100644 --- a/src/util/ansi.ts +++ b/src/util/ansi.ts @@ -67,6 +67,7 @@ export const markdownFormatter: OutputFormatter = new class implements OutputFor throw new Error(`Unsupported font style: ${options.style}`); } } + return input.replaceAll(/\n/g, '\\\n').replaceAll(/ /g, ' '); } diff --git a/src/util/schema.ts b/src/util/schema.ts index 0dab3d433f..bd7cf83f88 100644 --- a/src/util/schema.ts +++ b/src/util/schema.ts @@ -34,6 +34,15 @@ export function genericDescription(level: number, formatter: OutputFormatter, na ); } break; + case 'array': + if('items' in desc) { + lines.push({ text: 'Valid item types:', level: level }); + lines.push( + ...(desc['items'] as Joi.Description[]) + .flatMap(desc => genericDescription(level + 1, formatter, '.', desc)) + ); + } + break; default: /* specific support for others if needed */ break; diff --git a/test/functionality/dataflow/query/call-context-query-tests.ts b/test/functionality/dataflow/query/call-context-query-tests.ts index 463e891e8e..8c7a3cef7d 100644 --- a/test/functionality/dataflow/query/call-context-query-tests.ts +++ b/test/functionality/dataflow/query/call-context-query-tests.ts @@ -44,8 +44,6 @@ function r(results: CallContextQuerySubKindResult[], kind = 'test-kind', subkind }); } -// TODO: documentation -// TODO: add REPL and message describe('Call Context Query', withShell(shell => { function testQuery(name: string, code: string, query: readonly CallContextQuery[], expected: QueryResultsWithoutMeta) { assertQuery(label(name), shell, code, query, expected); @@ -71,7 +69,9 @@ describe('Call Context Query', withShell(shell => { testQuery('Multiple wanted Calls', 'print(1); print(2)', [q(/print/)], r([{ id: 3 }, { id: 7 }])); testQuery('Print calls (global)', 'print(1)', [q(/print/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [BuiltIn] }])); testQuery('Higher-Order Calls', 'lapply(c(1,2,3),print)', [q(/print/)], r([{ id: 10 }])); - testQuery('Print calls (global)', 'read_csv(x)', [q(/read_csv/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [] }])); + testQuery('Reading non-built-ins', 'read_csv(x)', [q(/read_csv/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [] }])); + testQuery('Built-In in Argument', 'print(mean(x))', [q(/mean/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 4, calls: [BuiltIn] }])); + testQuery('Multiple Built-In in Argument', 'mean(y)\nprint(mean(x))', [q(/mean/, { callTargets: CallTargets.OnlyGlobal })], r([{ id: 3, calls: [BuiltIn] }, { id: 8, calls: [BuiltIn] }])); }); describe('Mixed Targets', () => { const code = 'if(x) { print <- function() {} }\nprint()'; diff --git a/wiki/Query API.md b/wiki/Query API.md index 58eb4e9cfd..bf0e7da76a 100644 --- a/wiki/Query API.md +++ b/wiki/Query API.md @@ -1,4 +1,4 @@ -_This document was generated automatically from '/home/limerent/GitHub/phd/flowr/src/documentation/print-query-wiki.ts' on 2024-09-22, 12:50:06 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, samples generated with R version 4.4.1)._ +_This document was generated automatically from '/home/limerent/GitHub/phd/flowr/src/documentation/print-query-wiki.ts' on 2024-09-22, 19:43:07 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, samples are generated with R version 4.4.1)._ This page briefly summarizes flowR's query API, represented by the executeQueries function in [`./src/queries/query.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/query.ts). Please see the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to access this API (TODO TODO TODO). @@ -14,13 +14,16 @@ library(readr) data <- read_csv('data.csv') data2 <- read_csv('data2.csv') -mean <- mean(data$x) -print(mean) +m <- mean(data$x) +print(m) data %>% ggplot(aes(x = x, y = y)) + geom_point() +plot(data2$x, data2$y) +points(data2$x, data2$y) + print(mean(data2$k)) ``` @@ -87,31 +90,31 @@ flowchart LR (18, 22)`"]] 26(["`#91;RSymbol#93; data (26) - *9.14-17*`"]) + *9.11-14*`"]) 27{{"`#91;RSymbol#93; x (27) - *9.14-19*`"}} + *9.11-16*`"}} 29[["`#91;RAccess#93; $ (29) - *9.14-19* + *9.11-16* (26, 27)`"]] 31[["`#91;RFunctionCall#93; mean (31) - *9.9-20* + *9.6-17* (29)`"]] - 24["`#91;RSymbol#93; mean + 24["`#91;RSymbol#93; m (24) - *9.1-4*`"] + *9.1*`"] 32[["`#91;RBinaryOp#93; #60;#45; (32) - *9.1-20* + *9.1-17* (24, 31)`"]] - 34(["`#91;RSymbol#93; mean + 34(["`#91;RSymbol#93; m (34) - *10.7-10*`"]) + *10.7*`"]) 36[["`#91;RFunctionCall#93; print (36) - *10.1-11* + *10.1-8* (34)`"]] 38(["`#91;RSymbol#93; data (38) @@ -133,7 +136,7 @@ flowchart LR %% 37---------------------------------------- %% data: {data (12, variable, def. @17)} %% data2: {data2 (18, variable, def. @23)} - %% mean: {mean (24, variable, def. @32)} + %% m: {m (24, variable, def. @32)} 48[["`#91;RFunctionCall#93; aes (48) *13.16-32* @@ -143,7 +146,7 @@ flowchart LR %% 37---------------------------------------- %% data: {data (12, variable, def. @17)} %% data2: {data2 (18, variable, def. @23)} - %% mean: {mean (24, variable, def. @32)} + %% m: {m (24, variable, def. @32)} 50[["`#91;RFunctionCall#93; ggplot (50) *13.9-33* @@ -158,7 +161,7 @@ flowchart LR %% 47---------------------------------------- %% data: {data (12, variable, def. @17)} %% data2: {data2 (18, variable, def. @23)} - %% mean: {mean (24, variable, def. @32)} + %% m: {m (24, variable, def. @32)} 54[["`#91;RFunctionCall#93; geom#95;point (54) *14.9-20*`"]] @@ -166,30 +169,78 @@ flowchart LR (55) *12.1-14.20* (52, 54)`"]] - 58(["`#91;RSymbol#93; data2 + 57(["`#91;RSymbol#93; data2 + (57) + *16.6-10*`"]) + 58{{"`#91;RSymbol#93; x (58) - *16.12-16*`"]) - 59{{"`#91;RSymbol#93; k - (59) - *16.12-18*`"}} - 61[["`#91;RAccess#93; $ - (61) - *16.12-18* - (58, 59)`"]] - %% Environment of 63 [level: 0]: + *16.6-12*`"}} + 60[["`#91;RAccess#93; $ + (60) + *16.6-12* + (57, 58)`"]] + 62(["`#91;RSymbol#93; data2 + (62) + *16.15-19*`"]) + 63{{"`#91;RSymbol#93; y + (63) + *16.15-21*`"}} + 65[["`#91;RAccess#93; $ + (65) + *16.15-21* + (62, 63)`"]] + 67[["`#91;RFunctionCall#93; plot + (67) + *16.1-22* + (60, 65)`"]] + 69(["`#91;RSymbol#93; data2 + (69) + *17.8-12*`"]) + 70{{"`#91;RSymbol#93; x + (70) + *17.8-14*`"}} + 72[["`#91;RAccess#93; $ + (72) + *17.8-14* + (69, 70)`"]] + 74(["`#91;RSymbol#93; data2 + (74) + *17.17-21*`"]) + 75{{"`#91;RSymbol#93; y + (75) + *17.17-23*`"}} + 77[["`#91;RAccess#93; $ + (77) + *17.17-23* + (74, 75)`"]] + %% Environment of 79 [level: 0]: %% Built-in - %% 48---------------------------------------- + %% 60---------------------------------------- %% data: {data (12, variable, def. @17)} %% data2: {data2 (18, variable, def. @23)} - %% mean: {mean (24, variable, def. @32)} - 63[["`#91;RFunctionCall#93; mean - (63) - *16.7-19* - (61)`"]] - 65[["`#91;RFunctionCall#93; print - (65) - *16.1-20* - (63)`"]] + %% m: {m (24, variable, def. @32)} + 79[["`#91;RFunctionCall#93; points + (79) + *17.1-24* + (72, 77)`"]] + 82(["`#91;RSymbol#93; data2 + (82) + *19.12-16*`"]) + 83{{"`#91;RSymbol#93; k + (83) + *19.12-18*`"}} + 85[["`#91;RAccess#93; $ + (85) + *19.12-18* + (82, 83)`"]] + 87[["`#91;RFunctionCall#93; mean + (87) + *19.7-19* + (85)`"]] + 89[["`#91;RFunctionCall#93; print + (89) + *19.1-20* + (87)`"]] 3 -->|"argument"| 1 7 -->|"argument"| 5 11 -->|"argument"| 9 @@ -226,15 +277,30 @@ flowchart LR 52 -->|"argument"| 50 55 -->|"reads, argument"| 52 55 -->|"reads, argument"| 54 - 58 -->|"reads"| 18 - 61 -->|"reads, returns, argument"| 58 - 61 -->|"reads, argument"| 59 - 63 -->|"reads"| 24 - 63 -->|"argument"| 61 - 65 -->|"reads, returns, argument"| 63 + 57 -->|"reads"| 18 + 60 -->|"reads, returns, argument"| 57 + 60 -->|"reads, argument"| 58 + 62 -->|"reads"| 18 + 65 -->|"reads, returns, argument"| 62 + 65 -->|"reads, argument"| 63 + 67 -->|"reads, argument"| 60 + 67 -->|"reads, argument"| 65 + 69 -->|"reads"| 18 + 72 -->|"reads, returns, argument"| 69 + 72 -->|"reads, argument"| 70 + 74 -->|"reads"| 18 + 77 -->|"reads, returns, argument"| 74 + 77 -->|"reads, argument"| 75 + 79 -->|"reads, argument"| 72 + 79 -->|"reads, argument"| 77 + 82 -->|"reads"| 18 + 85 -->|"reads, returns, argument"| 82 + 85 -->|"reads, argument"| 83 + 87 -->|"reads, argument"| 85 + 89 -->|"reads, returns, argument"| 87 ``` -(The analysis required _16.82 ms_ (including parsing and normalization) within the generation environment.) +(The analysis required _20.28 ms_ (including parsing and normalization) within the generation environment.) ------------------------------------------ @@ -269,17 +335,18 @@ Just as an example, the following [Call-Context Query](#call-context-query) find ] ``` -Results (prettified and summarized): + + +_Results (prettified and summarized):_ Query: **call-context** (0ms)\    ╰ **input**\ -     ╰ **csv-file**: **read_csv** (L.6), **read_csv** (L.7)\ -_All queries together required ≈1ms (total 7ms)_ - +     ╰ **csv-file**: _`read_csv`_ (L.6), _`read_csv`_ (L.7)\ +_All queries together required ≈1ms (1ms accuracy, total 7ms)_
Show Detailed Results as Json -The analysis required _6.96 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _7.28 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -317,37 +384,509 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int
+ + ## The Query Format Queries are JSON arrays of query objects, each of which uses a `type` property to specify the query type. - -The following query types are currently supported: +In general, we separate two types of queries: + +1. **Active Queries**: Are exactly what you would expect from a query (e.g., the [Call-Context Query](#call-context-query)). They fetch information from the dataflow graph. +2. **Virtual Queries**: Are used to structure your queries (e.g., the [Compound Query](#compound-query)). +We separate these from a concept perspective. +For now, we support the following **active** queries (which we will refer to simply as a `query`): -1. [Call-Context Query](#call-context-query) -2. [Compound Query (virtual)](#compound-query) +1. [Call-Context Query](#call-context-query) (`call-context`):\ + Finds all calls in a set of files that matches specified criteria. -TODO TOOD TODO get thef format to work +Similarly, we support the following **virtual** queries: +1. [Compound Query](#compound-query) (`compound`):\ + Combines multiple queries of the same type into one, specifying common arguments.
-Detailed Query Format +Detailed Query Format (Automatically Generated) + +Although it is probably better to consult the detailed explanations below, if you want to have a look at the scehma, here is its description: - **.** array +Valid item types: + - **.** alternatives + _Any query_ + - **.** alternatives + _Supported queries_ + - **.** object + _Call context query used to find calls in the dataflow graph_ + - **type** string [required] + _The type of the query._ + Allows only the values: 'call-context' + - **callName** string [required] + _Regex regarding the function name!_ + - **kind** string [optional] + _The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`_ + - **subkind** string [optional] + _The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`_ + - **callTargets** string [optional] + _Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve._ + Allows only the values: 'global', 'must-include-global', 'local', 'must-include-local', 'any' + - **linkTo** object [optional] + _Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc._ + - **type** string [required] + _The type of the linkTo sub-query._ + Allows only the values: 'link-to-last-call' + - **callName** string [required] + _Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression._ + - **.** alternatives + _Virtual queries (used for structure)_ + - **.** object + _Compound query used to combine queries of the same type_ + - **type** string [required] + _The type of the query._ + Allows only the values: 'compound' + - **query** string [required] + _The query to run on the file analysis information._ + - **commonArguments** object [required] + _Common arguments for all queries._ + - **arguments** array [required] + _Arguments for each query._ + Valid item types: + - **.** alternatives + _Supported queries_ + - **.** object + _Call context query used to find calls in the dataflow graph_ + - **type** string [required] + _The type of the query._ + Allows only the values: 'call-context' + - **callName** string [required] + _Regex regarding the function name!_ + - **kind** string [optional] + _The kind of the call, this can be used to group calls together (e.g., linking `plot` to `visualize`). Defaults to `.`_ + - **subkind** string [optional] + _The subkind of the call, this can be used to uniquely identify the respective call type when grouping the output (e.g., the normalized name, linking `ggplot` to `plot`). Defaults to `.`_ + - **callTargets** string [optional] + _Call targets the function may have. This defaults to `any`. Request this specifically to gain all call targets we can resolve._ + Allows only the values: 'global', 'must-include-global', 'local', 'must-include-local', 'any' + - **linkTo** object [optional] + _Links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc._ + - **type** string [required] + _The type of the linkTo sub-query._ + Allows only the values: 'link-to-last-call' + - **callName** string [required] + _Regex regarding the function name of the last call. Similar to `callName`, strings are interpreted as a regular expression._ + +
+ + +### Call-Context Query + + +Call context queries may be used to identify calls to specific functions that match criteria of your interest. +For now, we support two criteria: + +1. **Function Name** (`callName`): The function name is specified by a regular expression. This allows you to find all calls to functions that match a specific pattern. +2. **Call Targets** (`callTargets`): This specifies to what the function call targets. For example, you may want to find all calls to a function that is not defined locally. + +Besides this we provide three ways to automatically categorize and link identified invocations: + +1. **Kind** (`kind`): This is a general category that can be used to group calls together. For example, you may want to link all calls to `plot` to `visualize`. +2. **Subkind** (`subkind`): This is used to uniquely identify the respective call type when grouping the output. For example, you may want to link all calls to `ggplot` to `plot`. +3. **Linked Calls** (`linkTo`): This links the current call to the last call of the given kind. This way, you can link a call like `points` to the latest graphics plot etc. + For now, we _only_offer support for linking to the last call_ as the current flow dependency over-approximation is not stable. + +Re-using the example code from above, the following query attaches all calls to `mean` to the kind `visualize` and the subkind `text`, +all calls that start with `read_` to the kind `input` but only if they are not locally overwritten, and the subkind `csv-file`, and links all calls to `points` to the last call to `plot`: + + + +```json +[ + { + "type": "call-context", + "callName": "^mean$", + "kind": "visualize", + "subkind": "text" + }, + { + "type": "call-context", + "callName": "^read_", + "kind": "input", + "subkind": "csv-file", + "callTargets": "global" + }, + { + "type": "call-context", + "callName": "^points$", + "kind": "visualize", + "subkind": "plot", + "linkTo": { + "type": "link-to-last-call", + "callName": "^plot$" + } + } +] +``` + + + +_Results (prettified and summarized):_ + +Query: **call-context** (3ms)\ +   ╰ **input**\ +     ╰ **csv-file**: _`read_csv`_ (L.6), _`read_csv`_ (L.7)\ +   ╰ **visualize**\ +     ╰ **text**: _`mean`_ (L.9), _`mean`_ (L.19)\ +     ╰ **plot**: _`points`_ (L.17) with 1 link (_`plot`_ (L.16))\ +_All queries together required ≈3ms (1ms accuracy, total 12ms)_ + +
Show Detailed Results as Json + +The analysis required _11.60 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + +```json +{ + "call-context": { + ".meta": { + "timing": 3 + }, + "kinds": { + "input": { + "subkinds": { + "csv-file": [ + { + "id": 16, + "calls": [] + }, + { + "id": 22, + "calls": [] + } + ] + } + }, + "visualize": { + "subkinds": { + "text": [ + { + "id": 31 + }, + { + "id": 87 + } + ], + "plot": [ + { + "id": 79, + "linkedIds": [ + 67 + ] + } + ] + } + } + } + }, + ".meta": { + "timing": 3 + } +} +``` + + + +
+ + + + + +As you can see, all kinds and subkinds with the same name are grouped together. +Yet, re-stating common arguments and kinds may be cumbersome (although you can already use clever regex patterns). +See the [Compound Query](#compound-query) for a way to structure your queries more compactly if you think it gets too verbose. + + + +
+ +Implementation Details + +Responsible for the execution of the Call-Context Query query is `executeCallContextQueries` in [`./src/queries/call-context-query/call-context-query-executor.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/call-context-query/call-context-query-executor.ts). + +
+ + + + +### Compound Query + + +A compound query comes in use, whenever we want to state multiple queries of the same type with a set of common arguments. +It offers the following properties of interest: + +1. **Query** (`query`): the type of the query that is to be combined. +2. **Common Arguments** (`commonArguments`): The arguments that are to be used as defaults for all queries (i.e., any argument the query may have). +3. **Arguments** (`arguments`): The other arguments for the individual queries that are to be combined. + +For example, consider the following compound query that combines two call-context queries for `mean` and `print`, both of which are to be +assigned to the kind `visualize` and the subkind `text` (using the example code from above): + + + +```json +[ + { + "type": "compound", + "query": "call-context", + "commonArguments": { + "kind": "visualize", + "subkind": "text" + }, + "arguments": [ + { + "callName": "^mean$" + }, + { + "callName": "^print$" + } + ] + } +] +``` + + + +_Results (prettified and summarized):_ + +Query: **call-context** (0ms)\ +   ╰ **visualize**\ +     ╰ **text**: _`mean`_ (L.9), _`print`_ (L.10), _`mean`_ (L.19), _`print`_ (L.19)\ +_All queries together required ≈0ms (1ms accuracy, total 6ms)_ + +
Show Detailed Results as Json + +The analysis required _5.56 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + +```json +{ + "call-context": { + ".meta": { + "timing": 0 + }, + "kinds": { + "visualize": { + "subkinds": { + "text": [ + { + "id": 31 + }, + { + "id": 36 + }, + { + "id": 87 + }, + { + "id": 89 + } + ] + } + } + } + }, + ".meta": { + "timing": 0 + } +} +``` + +
-### Supported Queries -#### Call-Context Query -### Supported Virtual Queries + + +Of course, in this specific scenario, the following query would be equivalent: + + + +```json +[ + { + "type": "call-context", + "callName": "^(mean|print)$", + "kind": "visualize", + "subkind": "text" + } +] +``` + +
Show Results + +_Results (prettified and summarized):_ + +Query: **call-context** (0ms)\ +   ╰ **visualize**\ +     ╰ **text**: _`mean`_ (L.9), _`print`_ (L.10), _`mean`_ (L.19), _`print`_ (L.19)\ +_All queries together required ≈0ms (1ms accuracy, total 5ms)_ + +
Show Detailed Results as Json + +The analysis required _5.23 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + +```json +{ + "call-context": { + ".meta": { + "timing": 0 + }, + "kinds": { + "visualize": { + "subkinds": { + "text": [ + { + "id": 31 + }, + { + "id": 36 + }, + { + "id": 87 + }, + { + "id": 89 + } + ] + } + } + } + }, + ".meta": { + "timing": 0 + } +} +``` + + + +
+ +
+ + + +However, compound queries become more useful whenever common arguments can not be expressed as a union in one of their properties. +Additionally, you can still overwrite default arguments. +In the following, we (by default) want all calls to not resolve to a local definition, except for those to `print` for which we explicitly +want to resolve to a local definition: + + + +```json +[ + { + "type": "compound", + "query": "call-context", + "commonArguments": { + "kind": "visualize", + "subkind": "text", + "callTargets": "global" + }, + "arguments": [ + { + "callName": "^mean$" + }, + { + "callName": "^print$", + "callTargets": "local" + } + ] + } +] +``` + + + +_Results (prettified and summarized):_ + +Query: **call-context** (0ms)\ +   ╰ **visualize**\ +     ╰ **text**: _`mean`_ (L.9) with 1 call (_built-in_), _`mean`_ (L.19) with 1 call (_built-in_)\ +_All queries together required ≈0ms (1ms accuracy, total 5ms)_ + +
Show Detailed Results as Json + +The analysis required _5.09 ms_ (including parsing and normalization and the query) within the generation environment. + +In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. +Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. + +```json +{ + "call-context": { + ".meta": { + "timing": 0 + }, + "kinds": { + "visualize": { + "subkinds": { + "text": [ + { + "id": 31, + "calls": [ + "built-in" + ] + }, + { + "id": 87, + "calls": [ + "built-in" + ] + } + ] + } + } + } + }, + ".meta": { + "timing": 0 + } +} +``` + + + +
+ + + + + +Now, the results no longer contain calls to `plot` that are not defined locally. + + + +
+ +Implementation Details + +Responsible for the execution of the Compound Query query is `executeCompoundQueries` in [`./src/queries/virtual-query/compound-query.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/virtual-query/compound-query.ts). -#### Compound Query +
From 7c9193a972635372f2348ce15482b335259cbd28 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 09:31:28 +0200 Subject: [PATCH 25/41] refactor: auto generated wiki line --- .github/workflows/broken-links-and-wiki.yaml | 4 +-- package.json | 4 +-- src/documentation/doc-util/doc-auto-gen.ts | 13 ++++++++ .../print-dataflow-graph-wiki.ts | 4 +-- src/documentation/print-query-wiki.ts | 4 +-- wiki/Query API.md | 30 +++++++++---------- 6 files changed, 36 insertions(+), 23 deletions(-) create mode 100644 src/documentation/doc-util/doc-auto-gen.ts diff --git a/.github/workflows/broken-links-and-wiki.yaml b/.github/workflows/broken-links-and-wiki.yaml index 4fed28cc37..b387b99586 100644 --- a/.github/workflows/broken-links-and-wiki.yaml +++ b/.github/workflows/broken-links-and-wiki.yaml @@ -75,8 +75,8 @@ jobs: } update_wiki_page "Capabilities" capabilities-markdown - update_wiki_page "Dataflow Graph" df-graph-wiki-markdown - update_wiki_page "Query API" query-wiki-markdown + update_wiki_page "Dataflow Graph" wiki:df-graph + update_wiki_page "Query API" wiki:query-api if [ $CHANGED_ANY == "true" ]; then git config --local user.email "action@github.com" diff --git a/package.json b/package.json index 25257e5337..d90d88eda6 100644 --- a/package.json +++ b/package.json @@ -25,8 +25,8 @@ "summarizer": "ts-node src/cli/summarizer-app.ts", "export-quads": "ts-node src/cli/export-quads-app.ts", "capabilities-markdown": "ts-node src/documentation/print-capabilities-markdown.ts", - "df-graph-wiki-markdown": "ts-node src/documentation/print-dataflow-graph-wiki.ts", - "query-wiki-markdown": "ts-node src/documentation/print-query-wiki.ts", + "wiki:df-graph": "ts-node src/documentation/print-dataflow-graph-wiki.ts", + "wiki:query-api": "ts-node src/documentation/print-query-wiki.ts", "build": "tsc --project .", "build:bundle-flowr": "npm run build && esbuild --bundle dist/src/cli/flowr.js --platform=node --bundle --minify --target=node18 --outfile=dist/src/cli/flowr.min.js", "lint-local": "npx eslint --version && npx eslint src/ test/ --rule \"no-warning-comments: off\"", diff --git a/src/documentation/doc-util/doc-auto-gen.ts b/src/documentation/doc-util/doc-auto-gen.ts new file mode 100644 index 0000000000..b49cfcd53b --- /dev/null +++ b/src/documentation/doc-util/doc-auto-gen.ts @@ -0,0 +1,13 @@ +import {flowrVersion} from "../../util/version"; + +export interface AutoGenHeaderArguments { + readonly rVersion?: string; + readonly currentDateAndTime?: string; + readonly filename: string; + readonly purpose: string; +} +export function autoGenHeader( + { rVersion, filename, purpose, currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC') }: AutoGenHeaderArguments +) { + return `_This document was generated automatically from '${filename}' on ${currentDateAndTime} presenting an overview of flowR's ${purpose} (version: ${flowrVersion().format()}${ rVersion ? ', using R version ' + rVersion : ''})._` +} diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index 4acad263f0..eab806630c 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -11,6 +11,7 @@ import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; import { LogLevel } from '../util/log'; import { printDfGraphForCode, verifyExpectedSubgraph } from './doc-util/doc-dfg'; import { getFilePathMd } from './doc-util/doc-files'; +import {autoGenHeader} from "./doc-util/doc-auto-gen"; export interface SubExplanationParameters { readonly name: string, @@ -280,8 +281,7 @@ async function getEdgesExplanations(shell: RShell): Promise { async function getText(shell: RShell) { const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; - const currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC'); - return `_This document was generated automatically from '${module.filename}' on ${currentDateAndTime} presenting an overview of flowR's dataflow graph (version: ${flowrVersion().format()}, samples are generated with R version ${rversion})._ + return `${autoGenHeader({ filename: module.filename, purpose: 'dataflow graph', rVersion: rversion })} This page briefly summarizes flowR's dataflow graph, represented by ${DataflowGraph.name} in ${getFilePathMd('../dataflow/graph/graph.ts')}. In case you want to manually build such a graph (e.g., for testing), you can use the builder in ${getFilePathMd('../dataflow/graph/dataflowgraph-builder.ts')}. diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts index a0b4e94573..693d92ce4f 100644 --- a/src/documentation/print-query-wiki.ts +++ b/src/documentation/print-query-wiki.ts @@ -12,6 +12,7 @@ import { QueriesSchema } from '../queries/query-schema'; import { markdownFormatter } from '../util/ansi'; import { executeCallContextQueries } from '../queries/call-context-query/call-context-query-executor'; import { executeCompoundQueries } from '../queries/virtual-query/compound-query'; +import {autoGenHeader} from "./doc-util/doc-auto-gen"; const fileCode = ` library(ggplot) @@ -139,8 +140,7 @@ Now, the results no longer contain calls to \`plot\` that are not defined locall async function getText(shell: RShell) { const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; - const currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC'); - return `_This document was generated automatically from '${module.filename}' on ${currentDateAndTime} presenting an overview of flowR's dataflow graph (version: ${flowrVersion().format()}, samples are generated with R version ${rversion})._ + return `${autoGenHeader({ filename: module.filename, purpose: 'query API', rVersion: rversion })} This page briefly summarizes flowR's query API, represented by the ${executeQueries.name} function in ${getFilePathMd('../queries/query.ts')}. Please see the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more information on how to access this API (TODO TODO TODO). diff --git a/wiki/Query API.md b/wiki/Query API.md index bf0e7da76a..571b62cfb1 100644 --- a/wiki/Query API.md +++ b/wiki/Query API.md @@ -1,4 +1,4 @@ -_This document was generated automatically from '/home/limerent/GitHub/phd/flowr/src/documentation/print-query-wiki.ts' on 2024-09-22, 19:43:07 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, samples are generated with R version 4.4.1)._ +_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-query-wiki.ts' on 2024-09-23, 07:20:48 UTC presenting an overview of flowR's query API (version: 2.0.25, using R version 4.4.1)._ This page briefly summarizes flowR's query API, represented by the executeQueries function in [`./src/queries/query.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/queries/query.ts). Please see the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to access this API (TODO TODO TODO). @@ -300,7 +300,7 @@ flowchart LR 89 -->|"reads, returns, argument"| 87 ``` -(The analysis required _20.28 ms_ (including parsing and normalization) within the generation environment.) +(The analysis required _21.84 ms_ (including parsing and normalization) within the generation environment.) ------------------------------------------ @@ -342,11 +342,11 @@ _Results (prettified and summarized):_ Query: **call-context** (0ms)\    ╰ **input**\      ╰ **csv-file**: _`read_csv`_ (L.6), _`read_csv`_ (L.7)\ -_All queries together required ≈1ms (1ms accuracy, total 7ms)_ +_All queries together required ≈1ms (1ms accuracy, total 11ms)_
Show Detailed Results as Json -The analysis required _7.28 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _10.93 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -541,11 +541,11 @@ Query: **call-context** (3ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9), _`mean`_ (L.19)\      ╰ **plot**: _`points`_ (L.17) with 1 link (_`plot`_ (L.16))\ -_All queries together required ≈3ms (1ms accuracy, total 12ms)_ +_All queries together required ≈3ms (1ms accuracy, total 18ms)_
Show Detailed Results as Json -The analysis required _11.60 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _17.94 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -667,11 +667,11 @@ _Results (prettified and summarized):_ Query: **call-context** (0ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9), _`print`_ (L.10), _`mean`_ (L.19), _`print`_ (L.19)\ -_All queries together required ≈0ms (1ms accuracy, total 6ms)_ +_All queries together required ≈0ms (1ms accuracy, total 8ms)_
Show Detailed Results as Json -The analysis required _5.56 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _7.83 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -736,14 +736,14 @@ Of course, in this specific scenario, the following query would be equivalent: _Results (prettified and summarized):_ -Query: **call-context** (0ms)\ +Query: **call-context** (1ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9), _`print`_ (L.10), _`mean`_ (L.19), _`print`_ (L.19)\ -_All queries together required ≈0ms (1ms accuracy, total 5ms)_ +_All queries together required ≈1ms (1ms accuracy, total 13ms)_
Show Detailed Results as Json -The analysis required _5.23 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _12.66 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. @@ -752,7 +752,7 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int { "call-context": { ".meta": { - "timing": 0 + "timing": 1 }, "kinds": { "visualize": { @@ -776,7 +776,7 @@ Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Int } }, ".meta": { - "timing": 0 + "timing": 1 } } ``` @@ -826,11 +826,11 @@ _Results (prettified and summarized):_ Query: **call-context** (0ms)\    ╰ **visualize**\      ╰ **text**: _`mean`_ (L.9) with 1 call (_built-in_), _`mean`_ (L.19) with 1 call (_built-in_)\ -_All queries together required ≈0ms (1ms accuracy, total 5ms)_ +_All queries together required ≈0ms (1ms accuracy, total 10ms)_
Show Detailed Results as Json -The analysis required _5.09 ms_ (including parsing and normalization and the query) within the generation environment. +The analysis required _9.75 ms_ (including parsing and normalization and the query) within the generation environment. In general, the JSON contains the Ids of the nodes in question as they are present in the normalized AST or the dataflow graph of flowR. Please consult the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more information on how to get those. From f78073298d5bb0f18625f1bcb15946cd3526faf4 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 09:40:08 +0200 Subject: [PATCH 26/41] doc(query): interface wiki entry --- src/documentation/print-query-wiki.ts | 2 +- wiki/Interface.md | 113 +++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 2 deletions(-) diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts index 693d92ce4f..74e6792585 100644 --- a/src/documentation/print-query-wiki.ts +++ b/src/documentation/print-query-wiki.ts @@ -143,7 +143,7 @@ async function getText(shell: RShell) { return `${autoGenHeader({ filename: module.filename, purpose: 'query API', rVersion: rversion })} This page briefly summarizes flowR's query API, represented by the ${executeQueries.name} function in ${getFilePathMd('../queries/query.ts')}. -Please see the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more information on how to access this API (TODO TODO TODO). +Please see the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more information on how to access this API. First, consider that you have a file like the following (of course, this is just a simple and artificial example): diff --git a/wiki/Interface.md b/wiki/Interface.md index 33f9463bb5..d98f1f4c70 100644 --- a/wiki/Interface.md +++ b/wiki/Interface.md @@ -12,6 +12,7 @@ Although far from being as detailed as the in-depth explanation of [_flowR_](htt - [The Slice Request](#the-slice-request) - [Magic Comments](#magic-comments) - [The REPL Request](#the-repl-request) + - [The Query Request](#the-query-request) - [The Lineage Request](#the-lineage-request) - [💻 Using the REPL](#-using-the-repl) - [Example: Retrieving the Dataflow Graph](#example-retrieving-the-dataflow-graph) @@ -2721,7 +2722,7 @@ You only have to pass the command you want to execute in the `expression` field. We strongly recommend you to make use of the `id` field to link answers with requests as you can theoretically request the execution of multiple scripts at the same time, which then happens in parallel. > [!WARNING] -> There is currently no automatic sandboxing or safeguarding against such requests. They simply execute the respective R code on your machine. Please be very careful (and do not use `--r-session-access`). +> There is currently no automatic sandboxing or safeguarding against such requests. They simply execute the respective R code on your machine. Please be very careful (and do not use `--r-session-access` if you are unsure). The answer on such a request is different from the other messages as the `request-repl-execution` message may be sent multiple times. This allows to better handle requests that require more time but already output intermediate results. You can detect the end of the execution by receiving the `end-repl-execution` message. @@ -2772,6 +2773,116 @@ The `stream` field (either `stdout` or `stderr`) informs you of the output's ori
+### The Query Request + +
+Sequence Diagram + +```mermaid +sequenceDiagram + autonumber + participant Client + participant Server + + Client->>+Server: request-query + + alt + Server-->>Client: response-query + else + Server-->>Client: error + end + deactivate Server +``` + +
+ +In order to send queries, you have to send an [analysis request](#the-analysis-request) first. The `filetoken` you assign is of use here as you can re-use it to repeatedly query the same file. +This message provides direct access to _flowR_'s Query API. Please consult the [Query API documentation](https://github.com/flowr-analysis/flowr/wiki/Query%20API.md) for more information. + +
+Example Request + +_Note:_ even though we pretty-print these requests, they have to be sent as a single line, which ends with a newline. + +This request is the logical succession of the file analysis example above which uses the `filetoken`: `"x"`. + +```json +{ + "type": "request-query", + "id": "2", + "filetoken": "x", + "query": [ + { + "type": "compound", + "query": "call-context", + "commonArguments": { + "kind": "visualize", + "subkind": "text", + "callTargets": "global" + }, + "arguments": [ + { + "callName": "^mean$" + }, + { + "callName": "^print$", + "callTargets": "local" + } + ] + } +] +} +``` + +
+ +
+Example Response + +_Note:_ even though we pretty-print these responses, they are sent as a single line, ending with a newline. + +```json +{ + "type": "response-query", + "id": "2", + "results": { + "call-context": { + ".meta": { + "timing": 0 + }, + "kinds": { + "visualize": { + "subkinds": { + "text": [ + { + "id": 31, + "calls": [ + "built-in" + ] + }, + { + "id": 87, + "calls": [ + "built-in" + ] + } + ] + } + } + } + }, + ".meta": { + "timing": 0 + } + } +} +``` + +If an error occurred, the server will set the responses `type` to `"error"` and provide a message in the `reason` field. + +
+ + ### The Lineage Request
From 7618f23c909f8d90dd6dd1711102db937a375328 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 09:44:55 +0200 Subject: [PATCH 27/41] lint-fix: handle linter errors --- src/documentation/doc-util/doc-auto-gen.ts | 12 ++++++------ src/documentation/print-dataflow-graph-wiki.ts | 3 +-- src/documentation/print-query-wiki.ts | 3 +-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/documentation/doc-util/doc-auto-gen.ts b/src/documentation/doc-util/doc-auto-gen.ts index b49cfcd53b..4381fb1a7a 100644 --- a/src/documentation/doc-util/doc-auto-gen.ts +++ b/src/documentation/doc-util/doc-auto-gen.ts @@ -1,13 +1,13 @@ -import {flowrVersion} from "../../util/version"; +import { flowrVersion } from '../../util/version'; export interface AutoGenHeaderArguments { - readonly rVersion?: string; + readonly rVersion?: string; readonly currentDateAndTime?: string; - readonly filename: string; - readonly purpose: string; + readonly filename: string; + readonly purpose: string; } export function autoGenHeader( - { rVersion, filename, purpose, currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC') }: AutoGenHeaderArguments + { rVersion, filename, purpose, currentDateAndTime = new Date().toISOString().replace('T', ', ').replace(/\.\d+Z$/, ' UTC') }: AutoGenHeaderArguments ) { - return `_This document was generated automatically from '${filename}' on ${currentDateAndTime} presenting an overview of flowR's ${purpose} (version: ${flowrVersion().format()}${ rVersion ? ', using R version ' + rVersion : ''})._` + return `_This document was generated automatically from '${filename}' on ${currentDateAndTime} presenting an overview of flowR's ${purpose} (version: ${flowrVersion().format()}${ rVersion ? ', using R version ' + rVersion : ''})._`; } diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index eab806630c..17fb0676e0 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -1,6 +1,5 @@ import { DataflowGraph } from '../dataflow/graph/graph'; import type { MermaidMarkdownMark } from '../util/mermaid/dfg'; -import { flowrVersion } from '../util/version'; import { RShell } from '../r-bridge/shell'; import { VertexType } from '../dataflow/graph/vertex'; import { EdgeType } from '../dataflow/graph/edge'; @@ -11,7 +10,7 @@ import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; import { LogLevel } from '../util/log'; import { printDfGraphForCode, verifyExpectedSubgraph } from './doc-util/doc-dfg'; import { getFilePathMd } from './doc-util/doc-files'; -import {autoGenHeader} from "./doc-util/doc-auto-gen"; +import { autoGenHeader } from './doc-util/doc-auto-gen'; export interface SubExplanationParameters { readonly name: string, diff --git a/src/documentation/print-query-wiki.ts b/src/documentation/print-query-wiki.ts index 74e6792585..8230dba7e8 100644 --- a/src/documentation/print-query-wiki.ts +++ b/src/documentation/print-query-wiki.ts @@ -1,5 +1,4 @@ import { RShell } from '../r-bridge/shell'; -import { flowrVersion } from '../util/version'; import { printDfGraphForCode } from './doc-util/doc-dfg'; import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; import { LogLevel } from '../util/log'; @@ -12,7 +11,7 @@ import { QueriesSchema } from '../queries/query-schema'; import { markdownFormatter } from '../util/ansi'; import { executeCallContextQueries } from '../queries/call-context-query/call-context-query-executor'; import { executeCompoundQueries } from '../queries/virtual-query/compound-query'; -import {autoGenHeader} from "./doc-util/doc-auto-gen"; +import { autoGenHeader } from './doc-util/doc-auto-gen'; const fileCode = ` library(ggplot) From bbe25bde4712a3ffb178afdeb02e32dd27e01171 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 09:53:50 +0200 Subject: [PATCH 28/41] refactor: remove outsourced todo markers --- src/dataflow/environments/resolve-by-name.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/dataflow/environments/resolve-by-name.ts b/src/dataflow/environments/resolve-by-name.ts index 75a741230e..ba72e07865 100644 --- a/src/dataflow/environments/resolve-by-name.ts +++ b/src/dataflow/environments/resolve-by-name.ts @@ -5,7 +5,6 @@ import type { Identifier, IdentifierDefinition } from './identifier'; import { happensInEveryBranch } from '../info'; -// TODO: cache this! => promote environments to classes /** * Resolves a given identifier name to a list of its possible definition location using R scoping and resolving rules. * @@ -20,7 +19,6 @@ export function resolveByName(name: Identifier, environment: REnvironmentInforma do{ const definition = current.memory.get(name); if(definition !== undefined) { - /* TODO: guard for other control dependencies which are set? */ if(definition.every(d => happensInEveryBranch(d.controlDependencies))) { return definition; } else { From 69b14a14eda2ce125737aca34f161973a10e4e2b Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Mon, 23 Sep 2024 10:18:24 +0200 Subject: [PATCH 29/41] Apply code scanning fix for incomplete string escaping or encoding Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> --- src/util/ansi.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util/ansi.ts b/src/util/ansi.ts index 71d4e124d7..183a3bf8a3 100644 --- a/src/util/ansi.ts +++ b/src/util/ansi.ts @@ -68,6 +68,7 @@ export const markdownFormatter: OutputFormatter = new class implements OutputFor } } + input = input.replaceAll(/\\/g, '\\\\'); return input.replaceAll(/\n/g, '\\\n').replaceAll(/ /g, ' '); } From cf4bcad5439b6df28a5b1a2c961cc5bcc2aca762 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 10:49:58 +0200 Subject: [PATCH 30/41] doc(dfg): control dependencies --- src/documentation/doc-util/doc-dfg.ts | 4 +- .../print-dataflow-graph-wiki.ts | 20 ++ src/util/mermaid/dfg.ts | 2 +- src/util/text.ts | 17 ++ wiki/Dataflow Graph.md | 223 ++++++++++-------- 5 files changed, 165 insertions(+), 101 deletions(-) create mode 100644 src/util/text.ts diff --git a/src/documentation/doc-util/doc-dfg.ts b/src/documentation/doc-util/doc-dfg.ts index 0d3af00dea..965cc00911 100644 --- a/src/documentation/doc-util/doc-dfg.ts +++ b/src/documentation/doc-util/doc-dfg.ts @@ -28,7 +28,7 @@ export interface PrintDataflowGraphOptions { readonly mark?: ReadonlySet; readonly showCode?: boolean; } -export async function printDfGraphForCode(shell: RShell, code: string, { mark, showCode }: PrintDataflowGraphOptions = {}) { +export async function printDfGraphForCode(shell: RShell, code: string, { mark, showCode = true }: PrintDataflowGraphOptions = {}) { const now = performance.now(); const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, @@ -65,7 +65,7 @@ ${graphToMermaid({
- ` : '\n(' + metaInfo + ')\n\n') + +` : '\n(' + metaInfo + ')\n\n') + '-'.repeat(42) ; } diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index 17fb0676e0..141ba1816d 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -11,6 +11,7 @@ import { LogLevel } from '../util/log'; import { printDfGraphForCode, verifyExpectedSubgraph } from './doc-util/doc-dfg'; import { getFilePathMd } from './doc-util/doc-files'; import { autoGenHeader } from './doc-util/doc-auto-gen'; +import {nth} from "../util/text"; export interface SubExplanationParameters { readonly name: string, @@ -292,6 +293,8 @@ ${await printDfGraphForCode(shell,'x <- 3\ny <- x + 1\ny')} The above dataflow graph showcases the general gist. We define a dataflow graph as a directed graph G = (V, E), differentiating between ${getAllVertices().length} types of vertices V and ${getAllEdges().length} types of edges E allowing each vertex to have a single, and each edge to have multiple distinct types. +Additionally, every node may have links to its [control dependencies](#control-dependencies) (which you may view as a ${nth(getAllEdges().length + 1)} edge type although they are explicitly no data dependency). +
Vertex Types @@ -328,6 +331,23 @@ ${await getVertexExplanations(shell)} ## Edges ${await getEdgesExplanations(shell)} + +## Control Dependencies + +Each vertex may have a list of active control dependencies. +They hold the \`id\` of all nodes that effect if the current vertex is part of the execution or not, +and a boolean flag \`when\` to indicate if the control dependency is active when the condition is \`true\` or \`false\`. + +As an example, consider the following dataflow graph: + +${await printDfGraphForCode(shell,'if(p) a else b')} + +Whenever we visualize a graph, we represent the control dependencies as grayed out edges with a \`CD\` prefix, followed +by the \`when\` flag. +In the above example, both \`a\` and \`b\` depend on the \`if\`. Please note that they are _not_ linked to the result of +the condition itself as this is the more general linkage point (and harmonizes with other control structures, especially those which are user-defined). + + `; } diff --git a/src/util/mermaid/dfg.ts b/src/util/mermaid/dfg.ts index c3d01d2418..295a99530a 100644 --- a/src/util/mermaid/dfg.ts +++ b/src/util/mermaid/dfg.ts @@ -188,7 +188,7 @@ function vertexToMermaid(info: DataflowGraphVertexInfo, mermaid: MermaidGraph, i // who invented this syntax?! mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:red,color:red,stroke-width:4px;`); } - if(edgeTypes.has('CD-True')) { + if(edgeTypes.has('CD-True') || edgeTypes.has('CD-False')) { mermaid.edgeLines.push(` linkStyle ${mermaid.presentEdges.size - 1} stroke:gray,color:gray;`); } } diff --git a/src/util/text.ts b/src/util/text.ts new file mode 100644 index 0000000000..828943e623 --- /dev/null +++ b/src/util/text.ts @@ -0,0 +1,17 @@ +import {guard} from "./assert"; + +export function nth(n: number): string { + guard(isFinite(n) && n >= 1, 'n must be a non-negative number'); + const num = String(n) + const lastDigit = num[num.length - 1]; + switch (lastDigit) { + case '1': + return n > 0 && n < 20 ? `${n}th` : `${n}st`; + case '2': + return n > 0 && n < 20 ? `${n}th` : `${n}nd`; + case '3': + return n > 0 && n < 20 ? `${n}th` : `${n}rd`; + default: + return `${n}th`; + } +} diff --git a/wiki/Dataflow Graph.md b/wiki/Dataflow Graph.md index 61472c3aaf..21abf0bfdf 100644 --- a/wiki/Dataflow Graph.md +++ b/wiki/Dataflow Graph.md @@ -1,4 +1,4 @@ -_This document was generated automatically from '/home/runner/work/flowr/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-20, 06:46:35 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, samples generated with R version 4.4.0)._ +_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-23, 08:49:23 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, using R version 4.4.1)._ This page briefly summarizes flowR's dataflow graph, represented by DataflowGraph in [`./src/dataflow/graph/graph.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/graph.ts). In case you want to manually build such a graph (e.g., for testing), you can use the builder in [`./src/dataflow/graph/dataflowgraph-builder.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/dataflowgraph-builder.ts). @@ -58,8 +58,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _13.19 ms_ (including parsing and normalization) within the generation environment. +The analysis required _11.63 ms_ (including parsing and normalization) within the generation environment. ```r @@ -124,12 +123,12 @@ flowchart LR ------------------------------------------ - - The above dataflow graph showcases the general gist. We define a dataflow graph as a directed graph G = (V, E), differentiating between 5 types of vertices V and 9 types of edges E allowing each vertex to have a single, and each edge to have multiple distinct types. +Additionally, every node may have links to its [control dependencies](#control-dependencies) (which you may view as a 10th edge type). +
Vertex Types @@ -192,8 +191,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.73 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.56 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -218,8 +216,6 @@ flowchart LR ------------------------------------------ - - Describes a constant value (numbers, logicals, strings, ...) @@ -247,8 +243,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.28 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.42 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -273,8 +268,6 @@ flowchart LR ------------------------------------------ - - Describes symbol/variable references @@ -302,8 +295,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.47 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.43 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 1. ```r @@ -328,8 +320,6 @@ flowchart LR ------------------------------------------ - - Describes any kind of function call, these can happen implicitly as well! (see the notable cases) @@ -364,8 +354,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.50 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.32 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3. ```r @@ -398,8 +387,6 @@ flowchart LR ------------------------------------------ - - Control structures like `if` are desugared into function calls (we omit the arguments of `if`(TRUE, 1) for simplicity). @@ -438,8 +425,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.32 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.12 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -474,8 +460,6 @@ flowchart LR ------------------------------------------ - - Describes a defined variable. Not just `<-` causes this! @@ -512,8 +496,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.17 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.70 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -548,8 +531,6 @@ flowchart LR ------------------------------------------ - - Are described similar within the dataflow graph, only the active environment differs. @@ -586,8 +567,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.22 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.02 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 2. ```r @@ -620,8 +600,6 @@ end ------------------------------------------ - - Describes a function definition. Are always anonymous at first; although they can be bound to a name, the id `0` refers to the `1` in the body. The presented subgraph refers to the body of the function, marking exit points and open references. @@ -671,8 +649,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.40 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.96 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->0. ```r @@ -717,8 +694,6 @@ flowchart LR ------------------------------------------ - - The source vertex is usually a `use` that reads from the respective target definition. @@ -752,7 +727,7 @@ end (0, 4)`"]] %% Environment of 7 [level: 0]: %% Built-in - %% 170---------------------------------------- + %% 121---------------------------------------- %% foo: {foo (0, function, def. @5)} 7[["`#91;RFunctionCall#93; foo (7) @@ -773,8 +748,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.68 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.84 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->0. ```r @@ -806,7 +780,7 @@ end (0, 4)`"]] %% Environment of 7 [level: 0]: %% Built-in - %% 170---------------------------------------- + %% 121---------------------------------------- %% foo: {foo (0, function, def. @5)} 7[["`#91;RFunctionCall#93; foo (7) @@ -828,8 +802,6 @@ end ------------------------------------------ - - Named calls are resolved too, linking to the symbol that holds the anonymous function definition (indirectly or directly) #### Reads Edge (Parameter) @@ -879,8 +851,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.48 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.45 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->1. ```r @@ -934,8 +905,6 @@ end ------------------------------------------ - - Parameters can read from each other as well. @@ -975,8 +944,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.09 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.13 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->1, 0->2. ```r @@ -1011,8 +979,6 @@ flowchart LR ------------------------------------------ - - The source vertex is usually a `define variable` that is defined by the respective target use. However, nested definitions can carry it (in the nested case, `x` is defined by the return value of `<-`(y, z)). Additionally, we link the assignment. @@ -1062,8 +1028,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.20 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.26 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->4, 0->3, 1->3. ```r @@ -1109,8 +1074,6 @@ flowchart LR ------------------------------------------ - - Nested definitions can carry the `defined by` edge as well. #### DefinedBy Edge (Expression) @@ -1150,8 +1113,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.17 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.30 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->3. ```r @@ -1195,8 +1157,6 @@ flowchart LR ------------------------------------------ - - Here, we define by the result of the `+` expression. @@ -1232,7 +1192,7 @@ end (0, 4)`"]] %% Environment of 7 [level: 0]: %% Built-in - %% 343---------------------------------------- + %% 242---------------------------------------- %% foo: {foo (0, function, def. @5)} 7[["`#91;RFunctionCall#93; foo (7) @@ -1253,8 +1213,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.46 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.41 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->4. ```r @@ -1286,7 +1245,7 @@ end (0, 4)`"]] %% Environment of 7 [level: 0]: %% Built-in - %% 343---------------------------------------- + %% 242---------------------------------------- %% foo: {foo (0, function, def. @5)} 7[["`#91;RFunctionCall#93; foo (7) @@ -1308,8 +1267,6 @@ end ------------------------------------------ - - Link the function call to the (anonymous) function definition. @@ -1345,7 +1302,7 @@ end (0, 3)`"]] %% Environment of 6 [level: 0]: %% Built-in - %% 388---------------------------------------- + %% 275---------------------------------------- %% foo: {foo (0, function, def. @4)} 6[["`#91;RFunctionCall#93; foo (6) @@ -1366,8 +1323,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.47 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.38 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 6->1. ```r @@ -1400,7 +1356,7 @@ end (0, 3)`"]] %% Environment of 6 [level: 0]: %% Built-in - %% 388---------------------------------------- + %% 275---------------------------------------- %% foo: {foo (0, function, def. @4)} 6[["`#91;RFunctionCall#93; foo (6) @@ -1422,8 +1378,6 @@ end ------------------------------------------ - - Link the function call to the exit points of the target definition (this may incorporate the call-context). @@ -1467,7 +1421,7 @@ end *2.3*`"]) %% Environment of 12 [level: 0]: %% Built-in - %% 454---------------------------------------- + %% 318---------------------------------------- %% f: {f (0, function, def. @7)} 12[["`#91;RFunctionCall#93; f (12) @@ -1494,8 +1448,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.60 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.53 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. ```r @@ -1536,7 +1489,7 @@ end *2.3*`"]) %% Environment of 12 [level: 0]: %% Built-in - %% 454---------------------------------------- + %% 318---------------------------------------- %% f: {f (0, function, def. @7)} 12[["`#91;RFunctionCall#93; f (12) @@ -1563,8 +1516,6 @@ end ------------------------------------------ - - **This edge is automatically joined with defined by on call!** Link an Argument to whichever parameter they cause to be defined if the related function call is invoked. @@ -1610,7 +1561,7 @@ end *2.3*`"]) %% Environment of 12 [level: 0]: %% Built-in - %% 520---------------------------------------- + %% 363---------------------------------------- %% f: {f (0, function, def. @7)} 12[["`#91;RFunctionCall#93; f (12) @@ -1637,8 +1588,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.58 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.43 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. ```r @@ -1679,7 +1629,7 @@ end *2.3*`"]) %% Environment of 12 [level: 0]: %% Built-in - %% 520---------------------------------------- + %% 363---------------------------------------- %% f: {f (0, function, def. @7)} 12[["`#91;RFunctionCall#93; f (12) @@ -1706,8 +1656,6 @@ end ------------------------------------------ - - **This edge is automatically joined with defines on call!** This represents the other direction of `defines on call` (i.e., links the parameter to the argument). This is just for completeness. @@ -1746,8 +1694,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.14 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.50 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 5->1, 5->3. ```r @@ -1780,8 +1727,6 @@ flowchart LR ------------------------------------------ - - Links a function call to the entry point of its arguments. If we do not know the target of such a call, we automatically assume that all arguments are read by the call as well! @@ -1800,7 +1745,7 @@ Type: `128` flowchart LR %% Environment of 7 [level: 0]: %% Built-in - %% 623---------------------------------------- + %% 437---------------------------------------- %% x: {x (3, variable, def. @5)} 7["`#91;RFunctionDefinition#93; function (7) @@ -1831,7 +1776,7 @@ end (0, 7)`"]] %% Environment of 10 [level: 0]: %% Built-in - %% 631---------------------------------------- + %% 443---------------------------------------- %% f: {f (0, function, def. @8)} 10[["`#91;RFunctionCall#93; f (10) @@ -1858,8 +1803,7 @@ end R Code of the Dataflow Graph - -The analysis required _1.83 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.32 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->10. ```r @@ -1875,7 +1819,7 @@ f() flowchart LR %% Environment of 7 [level: 0]: %% Built-in - %% 623---------------------------------------- + %% 437---------------------------------------- %% x: {x (3, variable, def. @5)} 7["`#91;RFunctionDefinition#93; function (7) @@ -1906,7 +1850,7 @@ end (0, 7)`"]] %% Environment of 10 [level: 0]: %% Built-in - %% 631---------------------------------------- + %% 443---------------------------------------- %% f: {f (0, function, def. @8)} 10[["`#91;RFunctionCall#93; f (10) @@ -1934,8 +1878,6 @@ end ------------------------------------------ - - Links a global side effect to an affected function call (e.g., a super definition within the function body) @@ -1967,8 +1909,7 @@ flowchart LR R Code of the Dataflow Graph - -The analysis required _1.21 ms_ (including parsing and normalization) within the generation environment. +The analysis required _0.89 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->1. ```r @@ -1997,10 +1938,96 @@ flowchart LR ------------------------------------------ +Marks cases in which R's non-standard evaluation mechanisms cause the default semantics to deviate + + -Marks cases in which R's non-standard evaluation mechanisms cause the default semantics to deviate +## Control Dependencies +Each vertex may have a list of active control dependencies. +They hold the `id` of all nodes that effect if the current vertex is part of the execution or not, +and a boolean flag `when` to indicate if the control dependency is active when the condition is `true` or `false`. +As an example, consider the following dataflow graph: + + + +------------------------------------------ + +```mermaid +flowchart LR + 0(["`#91;RSymbol#93; p + (0) + *1.4*`"]) + 1(["`#91;RSymbol#93; a + (1, :may:5+) + *1.7*`"]) + 3(["`#91;RSymbol#93; b + (3, :may:5-) + *1.14*`"]) + 5[["`#91;RIfThenElse#93; if + (5) + *1.1-14* + (0, 1, 3)`"]] + 1 -->|"CD-True"| 5 + linkStyle 0 stroke:gray,color:gray; + 3 -->|"CD-False"| 5 + linkStyle 1 stroke:gray,color:gray; + 5 -->|"returns, argument"| 1 + 5 -->|"returns, argument"| 3 + 5 -->|"reads, argument"| 0 +``` +
+ +R Code of the Dataflow Graph + +The analysis required _1.29 ms_ (including parsing and normalization) within the generation environment. + + +```r +if(p) a else b +``` + +
+ +Mermaid Code (without markings) + +``` +flowchart LR + 0(["`#91;RSymbol#93; p + (0) + *1.4*`"]) + 1(["`#91;RSymbol#93; a + (1, :may:5+) + *1.7*`"]) + 3(["`#91;RSymbol#93; b + (3, :may:5-) + *1.14*`"]) + 5[["`#91;RIfThenElse#93; if + (5) + *1.1-14* + (0, 1, 3)`"]] + 1 -->|"CD-True"| 5 + linkStyle 0 stroke:gray,color:gray; + 3 -->|"CD-False"| 5 + linkStyle 1 stroke:gray,color:gray; + 5 -->|"returns, argument"| 1 + 5 -->|"returns, argument"| 3 + 5 -->|"reads, argument"| 0 +``` + +
+ +
+ +------------------------------------------ + +Whenever we visualize a graph, we represent the control dependencies as grayed out edges with a `CD` prefix, followed +by the `when` flag. +In the above example, both `a` and `b` depend on the `if`. Please note that they are _not_ linked to the result of +the condition itself as this is the more general linkage point (and harmonizes with other control structures, especially those which are user-defined). + + From 59e7168d96999f30e2280e06b4ce61bb21433797 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 10:52:03 +0200 Subject: [PATCH 31/41] lint-fix: handle linter issues --- .../print-dataflow-graph-wiki.ts | 2 +- src/util/text.ts | 28 +++++++++---------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index 141ba1816d..99040eb2e9 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -11,7 +11,7 @@ import { LogLevel } from '../util/log'; import { printDfGraphForCode, verifyExpectedSubgraph } from './doc-util/doc-dfg'; import { getFilePathMd } from './doc-util/doc-files'; import { autoGenHeader } from './doc-util/doc-auto-gen'; -import {nth} from "../util/text"; +import { nth } from '../util/text'; export interface SubExplanationParameters { readonly name: string, diff --git a/src/util/text.ts b/src/util/text.ts index 828943e623..dcfb34e8c7 100644 --- a/src/util/text.ts +++ b/src/util/text.ts @@ -1,17 +1,17 @@ -import {guard} from "./assert"; +import { guard } from './assert'; export function nth(n: number): string { - guard(isFinite(n) && n >= 1, 'n must be a non-negative number'); - const num = String(n) - const lastDigit = num[num.length - 1]; - switch (lastDigit) { - case '1': - return n > 0 && n < 20 ? `${n}th` : `${n}st`; - case '2': - return n > 0 && n < 20 ? `${n}th` : `${n}nd`; - case '3': - return n > 0 && n < 20 ? `${n}th` : `${n}rd`; - default: - return `${n}th`; - } + guard(isFinite(n) && n >= 1, 'n must be a non-negative number'); + const num = String(n); + const lastDigit = num[num.length - 1]; + switch(lastDigit) { + case '1': + return n > 0 && n < 20 ? `${n}th` : `${n}st`; + case '2': + return n > 0 && n < 20 ? `${n}th` : `${n}nd`; + case '3': + return n > 0 && n < 20 ? `${n}th` : `${n}rd`; + default: + return `${n}th`; + } } From bb0df24ed906d1ca72a78212241af54a32311398 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 11:30:25 +0200 Subject: [PATCH 32/41] wip(dfg): document dataflow information --- src/documentation/doc-util/doc-dfg.ts | 11 +- src/documentation/doc-util/doc-query.ts | 8 +- .../print-dataflow-graph-wiki.ts | 55 +++- wiki/Dataflow Graph.md | 274 +++++++++++------- 4 files changed, 224 insertions(+), 124 deletions(-) diff --git a/src/documentation/doc-util/doc-dfg.ts b/src/documentation/doc-util/doc-dfg.ts index 965cc00911..4e97fabfa2 100644 --- a/src/documentation/doc-util/doc-dfg.ts +++ b/src/documentation/doc-util/doc-dfg.ts @@ -12,7 +12,7 @@ import { diffOfDataflowGraphs } from '../../dataflow/graph/diff'; import { guard } from '../../util/assert'; import { printAsMs } from './doc-ms'; -function printDfGraph(graph: DataflowGraph, mark?: ReadonlySet) { +export function printDfGraph(graph: DataflowGraph, mark?: ReadonlySet) { return ` \`\`\`mermaid ${graphToMermaid({ @@ -38,10 +38,10 @@ export async function printDfGraphForCode(shell: RShell, code: string, { mark, s const metaInfo = `The analysis required _${printAsMs(duration)}_ (including parsing and normalization) within the generation environment.`; - return '\n\n' + '-'.repeat(42) + '\n' + printDfGraph(result.dataflow.graph, mark) + (showCode ? ` + return '\n\n' + printDfGraph(result.dataflow.graph, mark) + (showCode ? `
-R Code of the Dataflow Graph +R Code of the Dataflow Graph ${metaInfo} ${mark ? `The following marks are used in the graph to highlight sub-parts (uses ids): ${[...mark].join(', ')}.` : ''} @@ -52,7 +52,7 @@ ${code}
-Mermaid Code (without markings) +Mermaid Code ${(mark?.size ?? 0) > 0 ? '(without markings)' : ''} \`\`\` ${graphToMermaid({ @@ -65,8 +65,7 @@ ${graphToMermaid({
-` : '\n(' + metaInfo + ')\n\n') + - '-'.repeat(42) +` : '\n(' + metaInfo + ')\n\n') ; } diff --git a/src/documentation/doc-util/doc-query.ts b/src/documentation/doc-util/doc-query.ts index 4f86b4bab6..386f9a704a 100644 --- a/src/documentation/doc-util/doc-query.ts +++ b/src/documentation/doc-util/doc-query.ts @@ -41,7 +41,7 @@ The analysis required _${printAsMs(duration)}_ (including parsing and normalizat ${JSON.stringify(queries, jsonReplacer, 2)} \`\`\` -${collapseResult ? '
Show Results' : ''} +${collapseResult ? '
Show Results' : ''} _Results (prettified and summarized):_ @@ -49,7 +49,7 @@ ${ asciiSummaryOfQueryResult(markdownFormatter, duration, results as QueryResults<'call-context'>, analysis) } -
Show Detailed Results as Json +
Show Detailed Results as Json ${metaInfo} @@ -62,7 +62,7 @@ ${resultAsString} ${ showCode ? ` -
Original Code +
Original Code \`\`\`r ${code} @@ -123,7 +123,7 @@ ${await buildExplanation(shell)}
-Implementation Details +Implementation Details Responsible for the execution of the ${name} query is \`${functionName}\` in ${getFilePathMd(functionFile)}. diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index 99040eb2e9..79d4262110 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -8,10 +8,14 @@ import { guard } from '../util/assert'; import { defaultEnv } from '../../test/functionality/_helper/dataflow/environment-builder'; import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; import { LogLevel } from '../util/log'; -import { printDfGraphForCode, verifyExpectedSubgraph } from './doc-util/doc-dfg'; -import { getFilePathMd } from './doc-util/doc-files'; +import {printDfGraph, printDfGraphForCode, verifyExpectedSubgraph} from './doc-util/doc-dfg'; +import {FlowrWikiBaseRef, getFilePathMd} from './doc-util/doc-files'; import { autoGenHeader } from './doc-util/doc-auto-gen'; import { nth } from '../util/text'; +import {PipelineExecutor} from "../core/pipeline-executor"; +import {DEFAULT_DATAFLOW_PIPELINE} from "../core/steps/pipeline/default-pipelines"; +import {requestFromInput} from "../r-bridge/retriever"; +import {PipelineOutput} from "../core/steps/pipeline/pipeline"; export interface SubExplanationParameters { readonly name: string, @@ -278,6 +282,15 @@ async function getEdgesExplanations(shell: RShell): Promise { return results.join('\n'); } +async function dummyDataflow(): Promise> { + const shell = new RShell() + const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput('x <- 1\nx + 1') + }).allRemainingSteps(); + shell.close(); + return result; +} async function getText(shell: RShell) { const rversion = (await shell.usedRVersion())?.format() ?? 'unknown'; @@ -287,6 +300,9 @@ This page briefly summarizes flowR's dataflow graph, represented by ${DataflowGr In case you want to manually build such a graph (e.g., for testing), you can use the builder in ${getFilePathMd('../dataflow/graph/dataflowgraph-builder.ts')}. This wiki page focuses on explaining what such a dataflow graph looks like! +Please be aware that the accompanied [dataflow information](#dataflow-information) contains things besides the graph, +like the entry and exit points of the subgraphs, and currently active references (see [below](#dataflow-information)). + ${await printDfGraphForCode(shell,'x <- 3\ny <- x + 1\ny')} @@ -347,6 +363,41 @@ by the \`when\` flag. In the above example, both \`a\` and \`b\` depend on the \`if\`. Please note that they are _not_ linked to the result of the condition itself as this is the more general linkage point (and harmonizes with other control structures, especially those which are user-defined). +## Dataflow Information + +Using _flowR's_ code interface (see the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more), you can generate the dataflow information +for a given piece of R code: + +\`\`\`ts +const shell = new RShell() +const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput('x <- 1\\nx + 1') +}).allRemainingSteps(); +shell.close(); +\`\`\` + +
+ +Transpiled Code + +The actual code we are using in case the example above gets oudated: + +\`\`\`ts +${dummyDataflow.toString()} +\`\`\` + +
+ + +Now, you can find the dataflow _information_ with \`result.dataflow\`. More specifically, the graph is stored in \`result.dataflow.graph\` and looks like this: + +${ +await (async () => { + const result = await dummyDataflow(); + return printDfGraph(result.dataflow.graph) +})() +} `; } diff --git a/wiki/Dataflow Graph.md b/wiki/Dataflow Graph.md index 21abf0bfdf..7091a87b97 100644 --- a/wiki/Dataflow Graph.md +++ b/wiki/Dataflow Graph.md @@ -1,12 +1,14 @@ -_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-23, 08:49:23 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, using R version 4.4.1)._ +_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-23, 09:29:19 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, using R version 4.4.1)._ This page briefly summarizes flowR's dataflow graph, represented by DataflowGraph in [`./src/dataflow/graph/graph.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/graph.ts). In case you want to manually build such a graph (e.g., for testing), you can use the builder in [`./src/dataflow/graph/dataflowgraph-builder.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/dataflowgraph-builder.ts). This wiki page focuses on explaining what such a dataflow graph looks like! +Please be aware that the accompanied [dataflow information](#dataflow-information) contains things besides the graph, +like the entry and exit points of the subgraphs, and currently active references (see [below](#dataflow-information)). + ------------------------------------------- ```mermaid flowchart LR @@ -56,9 +58,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _11.63 ms_ (including parsing and normalization) within the generation environment. +The analysis required _11.80 ms_ (including parsing and normalization) within the generation environment. ```r @@ -69,7 +71,7 @@ y
-Mermaid Code (without markings) +Mermaid Code ``` flowchart LR @@ -121,13 +123,13 @@ flowchart LR
------------------------------------------- + The above dataflow graph showcases the general gist. We define a dataflow graph as a directed graph G = (V, E), differentiating between 5 types of vertices V and 9 types of edges E allowing each vertex to have a single, and each edge to have multiple distinct types. -Additionally, every node may have links to its [control dependencies](#control-dependencies) (which you may view as a 10th edge type). +Additionally, every node may have links to its [control dependencies](#control-dependencies) (which you may view as a 10th edge type although they are explicitly no data dependency).
@@ -176,7 +178,6 @@ Type: `value` ------------------------------------------- ```mermaid flowchart LR @@ -189,9 +190,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.56 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.13 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -200,7 +201,7 @@ The following marks are used in the graph to highlight sub-parts (uses ids): 0.
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -214,7 +215,7 @@ flowchart LR
------------------------------------------- + Describes a constant value (numbers, logicals, strings, ...) @@ -228,7 +229,6 @@ Type: `use` ------------------------------------------- ```mermaid flowchart LR @@ -241,9 +241,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.42 ms_ (including parsing and normalization) within the generation environment. +The analysis required _0.94 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -252,7 +252,7 @@ x
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -266,7 +266,7 @@ flowchart LR
------------------------------------------- + Describes symbol/variable references @@ -280,7 +280,6 @@ Type: `function-call` ------------------------------------------- ```mermaid flowchart LR @@ -293,9 +292,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.43 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.05 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 1. ```r @@ -304,7 +303,7 @@ foo()
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -318,7 +317,7 @@ flowchart LR
------------------------------------------- + Describes any kind of function call, these can happen implicitly as well! (see the notable cases) @@ -331,7 +330,6 @@ Describes any kind of function call, these can happen implicitly as well! (see t ------------------------------------------- ```mermaid flowchart LR @@ -352,9 +350,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _2.32 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.72 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3. ```r @@ -363,7 +361,7 @@ if(TRUE) 1
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -385,7 +383,7 @@ flowchart LR
------------------------------------------- + Control structures like `if` are desugared into function calls (we omit the arguments of `if`(TRUE, 1) for simplicity). @@ -400,7 +398,6 @@ Type: `variable-definition` ------------------------------------------- ```mermaid flowchart LR @@ -423,9 +420,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.12 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.13 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -434,7 +431,7 @@ x <- 1
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -458,7 +455,7 @@ flowchart LR
------------------------------------------- + Describes a defined variable. Not just `<-` causes this! @@ -471,7 +468,6 @@ Describes a defined variable. Not just `<-` causes this! ------------------------------------------- ```mermaid flowchart LR @@ -494,9 +490,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.70 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.06 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. ```r @@ -505,7 +501,7 @@ x <<- 1
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -529,7 +525,7 @@ flowchart LR
------------------------------------------- + Are described similar within the dataflow graph, only the active environment differs. @@ -544,7 +540,6 @@ Type: `function-definition` ------------------------------------------- ```mermaid flowchart LR @@ -565,9 +560,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.02 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.60 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 2. ```r @@ -576,7 +571,7 @@ function() 1
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -598,7 +593,7 @@ end
------------------------------------------- + Describes a function definition. Are always anonymous at first; although they can be bound to a name, the id `0` refers to the `1` in the body. The presented subgraph refers to the body of the function, marking exit points and open references. @@ -615,7 +610,6 @@ Type: `1` ------------------------------------------- ```mermaid flowchart LR @@ -647,9 +641,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.96 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.14 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->0. ```r @@ -659,7 +653,7 @@ print(x)
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -692,7 +686,7 @@ flowchart LR
------------------------------------------- + The source vertex is usually a `use` that reads from the respective target definition. @@ -705,7 +699,6 @@ The source vertex is usually a `use` that reads from the respective target defin ------------------------------------------- ```mermaid flowchart LR @@ -746,9 +739,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.84 ms_ (including parsing and normalization) within the generation environment. +The analysis required _3.63 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->0. ```r @@ -758,7 +751,7 @@ foo()
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -800,14 +793,13 @@ end
------------------------------------------- + Named calls are resolved too, linking to the symbol that holds the anonymous function definition (indirectly or directly) #### Reads Edge (Parameter) ------------------------------------------- ```mermaid flowchart LR @@ -849,9 +841,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.45 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.64 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->1. ```r @@ -860,7 +852,7 @@ f <- function(x, y=x) {}
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -903,7 +895,7 @@ end
------------------------------------------- + Parameters can read from each other as well. @@ -918,7 +910,6 @@ Type: `2` ------------------------------------------- ```mermaid flowchart LR @@ -942,9 +933,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.13 ms_ (including parsing and normalization) within the generation environment. +The analysis required _0.76 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->1, 0->2. ```r @@ -953,7 +944,7 @@ x <- y
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -977,7 +968,7 @@ flowchart LR
------------------------------------------- + The source vertex is usually a `define variable` that is defined by the respective target use. However, nested definitions can carry it (in the nested case, `x` is defined by the return value of `<-`(y, z)). Additionally, we link the assignment. @@ -990,7 +981,6 @@ The source vertex is usually a `define variable` that is defined by the respecti ------------------------------------------- ```mermaid flowchart LR @@ -1026,9 +1016,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.26 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.16 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->4, 0->3, 1->3. ```r @@ -1037,7 +1027,7 @@ x <- y <- z
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1072,14 +1062,13 @@ flowchart LR
------------------------------------------- + Nested definitions can carry the `defined by` edge as well. #### DefinedBy Edge (Expression) ------------------------------------------- ```mermaid flowchart LR @@ -1111,9 +1100,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.30 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.49 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->3. ```r @@ -1122,7 +1111,7 @@ x <- y + z
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1155,7 +1144,7 @@ flowchart LR
------------------------------------------- + Here, we define by the result of the `+` expression. @@ -1170,7 +1159,6 @@ Type: `4` ------------------------------------------- ```mermaid flowchart LR @@ -1211,9 +1199,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.41 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.15 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->4. ```r @@ -1223,7 +1211,7 @@ foo()
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1265,7 +1253,7 @@ end
------------------------------------------- + Link the function call to the (anonymous) function definition. @@ -1279,7 +1267,6 @@ Type: `8` ------------------------------------------- ```mermaid flowchart LR @@ -1321,9 +1308,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.38 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.55 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 6->1. ```r @@ -1333,7 +1320,7 @@ foo()
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1376,7 +1363,7 @@ end
------------------------------------------- + Link the function call to the exit points of the target definition (this may incorporate the call-context). @@ -1390,7 +1377,6 @@ Type: `16` ------------------------------------------- ```mermaid flowchart LR @@ -1446,9 +1432,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.53 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.98 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. ```r @@ -1458,7 +1444,7 @@ f(x=1)
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1514,7 +1500,7 @@ end
------------------------------------------- + **This edge is automatically joined with defined by on call!** @@ -1530,7 +1516,6 @@ Type: `32` ------------------------------------------- ```mermaid flowchart LR @@ -1586,9 +1571,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.43 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.28 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. ```r @@ -1598,7 +1583,7 @@ f(x=1)
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1654,7 +1639,7 @@ end
------------------------------------------- + **This edge is automatically joined with defines on call!** @@ -1670,7 +1655,6 @@ Type: `64` ------------------------------------------- ```mermaid flowchart LR @@ -1692,9 +1676,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.50 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.00 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 5->1, 5->3. ```r @@ -1703,7 +1687,7 @@ f(x,y)
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1725,7 +1709,7 @@ flowchart LR
------------------------------------------- + Links a function call to the entry point of its arguments. If we do not know the target of such a call, we automatically assume that all arguments are read by the call as well! @@ -1739,7 +1723,6 @@ Type: `128` ------------------------------------------- ```mermaid flowchart LR @@ -1801,9 +1784,9 @@ end
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.32 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.42 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->10. ```r @@ -1813,7 +1796,7 @@ f()
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1876,7 +1859,7 @@ end
------------------------------------------- + Links a global side effect to an affected function call (e.g., a super definition within the function body) @@ -1890,7 +1873,6 @@ Type: `256` ------------------------------------------- ```mermaid flowchart LR @@ -1907,9 +1889,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _0.89 ms_ (including parsing and normalization) within the generation environment. +The analysis required _0.91 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->1. ```r @@ -1918,7 +1900,7 @@ quote(x)
-Mermaid Code (without markings) +Mermaid Code (without markings) ``` flowchart LR @@ -1936,7 +1918,7 @@ flowchart LR
------------------------------------------- + Marks cases in which R's non-standard evaluation mechanisms cause the default semantics to deviate @@ -1953,7 +1935,6 @@ As an example, consider the following dataflow graph: ------------------------------------------- ```mermaid flowchart LR @@ -1981,9 +1962,9 @@ flowchart LR
-R Code of the Dataflow Graph +R Code of the Dataflow Graph -The analysis required _1.29 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.32 ms_ (including parsing and normalization) within the generation environment. ```r @@ -1992,7 +1973,7 @@ if(p) a else b
-Mermaid Code (without markings) +Mermaid Code ``` flowchart LR @@ -2022,12 +2003,81 @@ flowchart LR
------------------------------------------- + Whenever we visualize a graph, we represent the control dependencies as grayed out edges with a `CD` prefix, followed by the `when` flag. In the above example, both `a` and `b` depend on the `if`. Please note that they are _not_ linked to the result of the condition itself as this is the more general linkage point (and harmonizes with other control structures, especially those which are user-defined). +## Dataflow Information + +Using _flowR's_ code interface (see the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more), you can generate the dataflow information +for a given piece of R code: + +```ts +const shell = new RShell() +const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: requestFromInput('x <- 1\nx + 1') +}).allRemainingSteps(); +shell.close(); +``` + +
+ +Transpiled Code + +The actual code we are using in case the example above gets oudated: + +```ts +async function dummyDataflow() { + const shell = new shell_1.RShell(); + const result = await new pipeline_executor_1.PipelineExecutor(default_pipelines_1.DEFAULT_DATAFLOW_PIPELINE, { + shell, + request: (0, retriever_1.requestFromInput)('x <- 1\nx + 1') + }).allRemainingSteps(); + shell.close(); + return result; +} +``` + +
+ + +Now, you can find the dataflow _information_ with `result.dataflow`. More specifically, the graph is stored in `result.dataflow.graph` and looks like this: + + +```mermaid +flowchart LR + 1{{"`#91;RNumber#93; 1 + (1) + *1.6*`"}} + 0["`#91;RSymbol#93; x + (0) + *1.1*`"] + 2[["`#91;RBinaryOp#93; #60;#45; + (2) + *1.1-6* + (0, 1)`"]] + 3(["`#91;RSymbol#93; x + (3) + *2.1*`"]) + 4{{"`#91;RNumber#93; 1 + (4) + *2.5*`"}} + 5[["`#91;RBinaryOp#93; #43; + (5) + *2.1-5* + (3, 4)`"]] + 0 -->|"defined-by"| 1 + 0 -->|"defined-by"| 2 + 2 -->|"argument"| 1 + 2 -->|"returns, argument"| 0 + 3 -->|"reads"| 0 + 5 -->|"reads, argument"| 3 + 5 -->|"reads, argument"| 4 +``` + From 13b7c1ccb23a3e5c9a2b246da7345d89a89de142 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 13:35:30 +0200 Subject: [PATCH 33/41] doc(dfg): unknown side effects --- src/documentation/doc-util/doc-dfg.ts | 5 +- src/documentation/doc-util/doc-env.ts | 17 + .../print-dataflow-graph-wiki.ts | 70 +- src/util/mermaid/dfg.ts | 5 +- wiki/Dataflow Graph.md | 3501 ++++++++++++++++- 5 files changed, 3564 insertions(+), 34 deletions(-) create mode 100644 src/documentation/doc-util/doc-env.ts diff --git a/src/documentation/doc-util/doc-dfg.ts b/src/documentation/doc-util/doc-dfg.ts index 4e97fabfa2..35c1343616 100644 --- a/src/documentation/doc-util/doc-dfg.ts +++ b/src/documentation/doc-util/doc-dfg.ts @@ -11,6 +11,7 @@ import type { DataflowDifferenceReport } from '../../dataflow/graph/diff'; import { diffOfDataflowGraphs } from '../../dataflow/graph/diff'; import { guard } from '../../util/assert'; import { printAsMs } from './doc-ms'; +import {jsonReplacer} from "../../util/json"; export function printDfGraph(graph: DataflowGraph, mark?: ReadonlySet) { return ` @@ -43,8 +44,8 @@ export async function printDfGraphForCode(shell: RShell, code: string, { mark, s R Code of the Dataflow Graph -${metaInfo} -${mark ? `The following marks are used in the graph to highlight sub-parts (uses ids): ${[...mark].join(', ')}.` : ''} +${metaInfo} ${mark ? `The following marks are used in the graph to highlight sub-parts (uses ids): ${[...mark].join(', ')}.` : ''} +We encountered ${result.dataflow.graph.unknownSideEffects.size > 0 ? 'unknown side effects (with ids: ' + JSON.stringify(result.dataflow.graph.unknownSideEffects, jsonReplacer) + ')' : 'no unknown side effects'} during the analysis. \`\`\`r ${code} diff --git a/src/documentation/doc-util/doc-env.ts b/src/documentation/doc-util/doc-env.ts new file mode 100644 index 0000000000..55d2c89226 --- /dev/null +++ b/src/documentation/doc-util/doc-env.ts @@ -0,0 +1,17 @@ +import {BuiltInEnvironment, IEnvironment} from "../../dataflow/environments/environment"; +import {printIdentifier} from "../../util/mermaid/dfg"; + +export function printEnvironmentToMarkdown(env: IEnvironment | undefined): string { + if(env === undefined) { + return '?? (error)'; + } else if(env.id === BuiltInEnvironment.id) { + return `_Built-in Environment (${env.memory.size} entries)_`; + } + + const lines = ['| Name | Definitions |', '|------|-------------|']; + for(const [name, defs] of env.memory.entries()) { + const printName = `\`${name}\``; + lines.push(`| ${printName} | {${defs.map(printIdentifier).join(', ')}} |`); + } + return lines.join('\n') + `\n\n
Parent Environment\n\n` + printEnvironmentToMarkdown(env.parent) + '\n\n
'; +} diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index 79d4262110..c6da19e3e8 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -1,5 +1,5 @@ import { DataflowGraph } from '../dataflow/graph/graph'; -import type { MermaidMarkdownMark } from '../util/mermaid/dfg'; +import {MermaidMarkdownMark} from '../util/mermaid/dfg'; import { RShell } from '../r-bridge/shell'; import { VertexType } from '../dataflow/graph/vertex'; import { EdgeType } from '../dataflow/graph/edge'; @@ -16,6 +16,8 @@ import {PipelineExecutor} from "../core/pipeline-executor"; import {DEFAULT_DATAFLOW_PIPELINE} from "../core/steps/pipeline/default-pipelines"; import {requestFromInput} from "../r-bridge/retriever"; import {PipelineOutput} from "../core/steps/pipeline/pipeline"; +import {jsonReplacer} from "../util/json"; +import {printEnvironmentToMarkdown} from "./doc-util/doc-env"; export interface SubExplanationParameters { readonly name: string, @@ -300,8 +302,9 @@ This page briefly summarizes flowR's dataflow graph, represented by ${DataflowGr In case you want to manually build such a graph (e.g., for testing), you can use the builder in ${getFilePathMd('../dataflow/graph/dataflowgraph-builder.ts')}. This wiki page focuses on explaining what such a dataflow graph looks like! -Please be aware that the accompanied [dataflow information](#dataflow-information) contains things besides the graph, +Please be aware that the accompanied [dataflow information](#dataflow-information) returned by _flowR_ contains things besides the graph, like the entry and exit points of the subgraphs, and currently active references (see [below](#dataflow-information)). +Additionally, you may be interested in the set of [Unknown Side Effects](#unknown-side-effects) marking calls which _flowR_ is unable to handle correctly. ${await printDfGraphForCode(shell,'x <- 3\ny <- x + 1\ny')} @@ -366,13 +369,13 @@ the condition itself as this is the more general linkage point (and harmonizes w ## Dataflow Information Using _flowR's_ code interface (see the [Interface](${FlowrWikiBaseRef}/Interface) wiki page for more), you can generate the dataflow information -for a given piece of R code: +for a given piece of R code (in this case \`x <- 1; x + 1\`) as follows: \`\`\`ts const shell = new RShell() const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, - request: requestFromInput('x <- 1\\nx + 1') + request: requestFromInput('x <- 1; x + 1') }).allRemainingSteps(); shell.close(); \`\`\` @@ -395,7 +398,64 @@ Now, you can find the dataflow _information_ with \`result.dataflow\`. More spec ${ await (async () => { const result = await dummyDataflow(); - return printDfGraph(result.dataflow.graph) + const dfGraphString = printDfGraph(result.dataflow.graph) + + return ` +${dfGraphString} + +However, the dataflow information contains more, quite a lot of information in fact. + +
+ +Dataflow Information as Json + +\`\`\`json +${JSON.stringify(result.dataflow, jsonReplacer, 2)} +\`\`\` + +
+ +So let's start by looking at the properties of the dataflow information object: ${Object.keys(result.dataflow).map(k => `\`${k}\``).join(', ')}. + +${ (() => { guard(Object.keys(result.dataflow).length === 7, () => 'Update Dataflow Documentation!'); return ''; })() } + +There are three sets of references. +**in** (ids: ${JSON.stringify(new Set(result.dataflow.in.map(n => n.nodeId)), jsonReplacer)}) and **out** (ids: ${JSON.stringify(new Set(result.dataflow.out.map(n => n.nodeId)), jsonReplacer)}) contain the +ingoing and outgoing references of the subgraph at hand (in this case, the whole code, as we are at the end of the dataflow analysis). +Besides the Ids, they also contain important meta-information (e.g., what is to be read). +The third set, **unknownReferences**, contains all references that are not yet identified as read or written +(the example does not have any, but, for example, \`x\` (with id 0) would first be unknown and then later classified as a definition). + +The **environment** property contains the active environment information of the subgraph. +In other words, this is a linked list of tables (scopes), mapping identifiers to their respective definitions. +A summarized version of the produced environment looks like this: + +${ + printEnvironmentToMarkdown(result.dataflow.environment.current) +} + +This shows us that the local environment contains a single definition for \`x\` (with id 0) and that the parent environment is the built-in environment. +Additionally, we get the information that the node with the id 2 was responsible for the definition of \`x\`. + +Last but not least, the information contains the single **entry point** (${ + JSON.stringify(result.dataflow.entryPoint) + }) and a set of **exit points** (${ + JSON.stringify(result.dataflow.exitPoints.map(e => e.nodeId)) + }). +Besides marking potential exits, the exit points also provide information about why the exit occurs and which control dependencies affect the exit. + +### Unknown Side Effects + +In case _flowR_ encounters a function call that it cannot handle, it marks the call as an unknown side effect. +You can find these as part of the dataflow graph, specifically as \`unknownSideEffects\` (with a leading underscore if sesrialized as Json). +In the following graph, _flowR_ realizes that it is unable to correctly handle the impacts of the \`load\` call and therefore marks it as such (marked in bright red): + +${await printDfGraphForCode(shell,'load("file")\nprint(x + y)')} + +In general, as we cannot handle these correctly, we leave it up to other analyses (and [queries](${FlowrWikiBaseRef}/Query API)) to handle these cases +as they see fit. + ` + })() } diff --git a/src/util/mermaid/dfg.ts b/src/util/mermaid/dfg.ts index 295a99530a..eebd972802 100644 --- a/src/util/mermaid/dfg.ts +++ b/src/util/mermaid/dfg.ts @@ -131,7 +131,7 @@ function mermaidNodeBrackets(tag: DataflowGraphVertexInfo['tag']): { open: strin return { open, close }; } -function printIdentifier(id: IdentifierDefinition): string { +export function printIdentifier(id: IdentifierDefinition): string { return `${id.name} (${id.nodeId}, ${id.kind},${id.controlDependencies? ' {' + id.controlDependencies.map(c => c.id + (c.when ? '+' : '-')).join(',') + '},' : ''} def. @${id.definedAt})`; } @@ -174,6 +174,9 @@ function vertexToMermaid(info: DataflowGraphVertexInfo, mermaid: MermaidGraph, i if(mark?.has(id)) { mermaid.nodeLines.push(` style ${idPrefix}${id} stroke:black,stroke-width:7px; `); } + if(mermaid.rootGraph.unknownSideEffects.has(id)) { + mermaid.nodeLines.push(` style ${idPrefix}${id} stroke:red,stroke-width:5px; `); + } const edges = mermaid.rootGraph.get(id, true); guard(edges !== undefined, `node ${id} must be found`); diff --git a/wiki/Dataflow Graph.md b/wiki/Dataflow Graph.md index 7091a87b97..192586a121 100644 --- a/wiki/Dataflow Graph.md +++ b/wiki/Dataflow Graph.md @@ -1,11 +1,12 @@ -_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-23, 09:29:19 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, using R version 4.4.1)._ +_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-23, 11:35:04 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, using R version 4.4.1)._ This page briefly summarizes flowR's dataflow graph, represented by DataflowGraph in [`./src/dataflow/graph/graph.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/graph.ts). In case you want to manually build such a graph (e.g., for testing), you can use the builder in [`./src/dataflow/graph/dataflowgraph-builder.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/dataflowgraph-builder.ts). This wiki page focuses on explaining what such a dataflow graph looks like! -Please be aware that the accompanied [dataflow information](#dataflow-information) contains things besides the graph, +Please be aware that the accompanied [dataflow information](#dataflow-information) returned by _flowR_ contains things besides the graph, like the entry and exit points of the subgraphs, and currently active references (see [below](#dataflow-information)). +Additionally, you may be interested in the set of [Unknown Side Effects](#unknown-side-effects) marking calls which _flowR_ is unable to handle correctly. @@ -60,8 +61,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _11.80 ms_ (including parsing and normalization) within the generation environment. +The analysis required _13.22 ms_ (including parsing and normalization) within the generation environment. +We encountered no unknown side effects during the analysis. ```r x <- 3 @@ -192,8 +194,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.13 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.35 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. +We encountered no unknown side effects during the analysis. ```r 42 @@ -243,8 +246,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _0.94 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.19 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. +We encountered no unknown side effects during the analysis. ```r x @@ -294,8 +298,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.05 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.17 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 1. +We encountered no unknown side effects during the analysis. ```r foo() @@ -352,8 +357,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.72 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.94 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3. +We encountered no unknown side effects during the analysis. ```r if(TRUE) 1 @@ -422,8 +428,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.13 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.44 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. +We encountered no unknown side effects during the analysis. ```r x <- 1 @@ -492,8 +499,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.06 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.53 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. +We encountered no unknown side effects during the analysis. ```r x <<- 1 @@ -562,8 +570,9 @@ end R Code of the Dataflow Graph -The analysis required _1.60 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.20 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 2. +We encountered no unknown side effects during the analysis. ```r function() 1 @@ -643,8 +652,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.14 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.82 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->0. +We encountered no unknown side effects during the analysis. ```r x <- 2 @@ -741,8 +751,9 @@ end R Code of the Dataflow Graph -The analysis required _3.63 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.14 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->0. +We encountered no unknown side effects during the analysis. ```r foo <- function() {} @@ -843,8 +854,9 @@ end R Code of the Dataflow Graph -The analysis required _1.64 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.81 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->1. +We encountered no unknown side effects during the analysis. ```r f <- function(x, y=x) {} @@ -935,8 +947,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _0.76 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.21 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->1, 0->2. +We encountered no unknown side effects during the analysis. ```r x <- y @@ -1018,8 +1031,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.16 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.77 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->4, 0->3, 1->3. +We encountered no unknown side effects during the analysis. ```r x <- y <- z @@ -1102,8 +1116,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.49 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.71 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->3. +We encountered no unknown side effects during the analysis. ```r x <- y + z @@ -1201,8 +1216,9 @@ end R Code of the Dataflow Graph -The analysis required _1.15 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.81 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->4. +We encountered no unknown side effects during the analysis. ```r foo <- function() {} @@ -1310,8 +1326,9 @@ end R Code of the Dataflow Graph -The analysis required _1.55 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.68 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 6->1. +We encountered no unknown side effects during the analysis. ```r foo <- function() x @@ -1434,8 +1451,9 @@ end R Code of the Dataflow Graph -The analysis required _2.98 ms_ (including parsing and normalization) within the generation environment. +The analysis required _3.21 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. +We encountered no unknown side effects during the analysis. ```r f <- function(x) {} @@ -1573,8 +1591,9 @@ end R Code of the Dataflow Graph -The analysis required _2.28 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.24 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. +We encountered no unknown side effects during the analysis. ```r f <- function(x) {} @@ -1678,8 +1697,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.00 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.64 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 5->1, 5->3. +We encountered no unknown side effects during the analysis. ```r f(x,y) @@ -1786,8 +1806,9 @@ end R Code of the Dataflow Graph -The analysis required _2.42 ms_ (including parsing and normalization) within the generation environment. +The analysis required _2.90 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->10. +We encountered no unknown side effects during the analysis. ```r f <- function() { x <<- 2 } @@ -1891,8 +1912,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _0.91 ms_ (including parsing and normalization) within the generation environment. +The analysis required _0.92 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->1. +We encountered no unknown side effects during the analysis. ```r quote(x) @@ -1964,8 +1986,9 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.32 ms_ (including parsing and normalization) within the generation environment. +The analysis required _1.31 ms_ (including parsing and normalization) within the generation environment. +We encountered no unknown side effects during the analysis. ```r if(p) a else b @@ -2013,13 +2036,13 @@ the condition itself as this is the more general linkage point (and harmonizes w ## Dataflow Information Using _flowR's_ code interface (see the [Interface](https://github.com/flowr-analysis/flowr/wiki//Interface) wiki page for more), you can generate the dataflow information -for a given piece of R code: +for a given piece of R code (in this case `x <- 1; x + 1`) as follows: ```ts const shell = new RShell() const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, - request: requestFromInput('x <- 1\nx + 1') + request: requestFromInput('x <- 1; x + 1') }).allRemainingSteps(); shell.close(); ``` @@ -2048,6 +2071,7 @@ async function dummyDataflow() { Now, you can find the dataflow _information_ with `result.dataflow`. More specifically, the graph is stored in `result.dataflow.graph` and looks like this: + ```mermaid flowchart LR 1{{"`#91;RNumber#93; 1 @@ -2080,4 +2104,3429 @@ flowchart LR ``` +However, the dataflow information contains more, quite a lot of information in fact. + +
+ +Dataflow Information as Json + +```json +{ + "unknownReferences": [], + "in": [ + { + "nodeId": 2, + "name": "<-" + }, + { + "nodeId": 5, + "name": "+", + "call": true + } + ], + "out": [ + { + "nodeId": 0, + "name": "x", + "kind": "variable", + "definedAt": 2 + }, + { + "nodeId": 0, + "name": "x", + "kind": "variable", + "definedAt": 2 + } + ], + "environment": { + "current": { + "id": 464, + "parent": { + "id": 0, + "memory": [ + [ + "NULL", + [ + { + "kind": "built-in-value", + "definedAt": "built-in", + "value": null, + "name": "NULL", + "nodeId": "built-in" + } + ] + ], + [ + "NA", + [ + { + "kind": "built-in-value", + "definedAt": "built-in", + "value": null, + "name": "NA", + "nodeId": "built-in" + } + ] + ], + [ + "TRUE", + [ + { + "kind": "built-in-value", + "definedAt": "built-in", + "value": true, + "name": "TRUE", + "nodeId": "built-in" + } + ] + ], + [ + "T", + [ + { + "kind": "built-in-value", + "definedAt": "built-in", + "value": true, + "name": "T", + "nodeId": "built-in" + } + ] + ], + [ + "FALSE", + [ + { + "kind": "built-in-value", + "definedAt": "built-in", + "value": false, + "name": "FALSE", + "nodeId": "built-in" + } + ] + ], + [ + "F", + [ + { + "kind": "built-in-value", + "definedAt": "built-in", + "value": false, + "name": "F", + "nodeId": "built-in" + } + ] + ], + [ + "~", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "~", + "nodeId": "built-in" + } + ] + ], + [ + "+", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "+", + "nodeId": "built-in" + } + ] + ], + [ + "-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "-", + "nodeId": "built-in" + } + ] + ], + [ + "*", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "*", + "nodeId": "built-in" + } + ] + ], + [ + "/", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "/", + "nodeId": "built-in" + } + ] + ], + [ + "^", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "^", + "nodeId": "built-in" + } + ] + ], + [ + "!", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "!", + "nodeId": "built-in" + } + ] + ], + [ + "?", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "?", + "nodeId": "built-in" + } + ] + ], + [ + "**", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "**", + "nodeId": "built-in" + } + ] + ], + [ + "==", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "==", + "nodeId": "built-in" + } + ] + ], + [ + "!=", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "!=", + "nodeId": "built-in" + } + ] + ], + [ + ">", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": ">", + "nodeId": "built-in" + } + ] + ], + [ + "<", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "<", + "nodeId": "built-in" + } + ] + ], + [ + ">=", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": ">=", + "nodeId": "built-in" + } + ] + ], + [ + "<=", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "<=", + "nodeId": "built-in" + } + ] + ], + [ + "%%", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "%%", + "nodeId": "built-in" + } + ] + ], + [ + "%/%", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "%/%", + "nodeId": "built-in" + } + ] + ], + [ + "%*%", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "%*%", + "nodeId": "built-in" + } + ] + ], + [ + "%in%", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "%in%", + "nodeId": "built-in" + } + ] + ], + [ + ":", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": ":", + "nodeId": "built-in" + } + ] + ], + [ + "list", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "list", + "nodeId": "built-in" + } + ] + ], + [ + "c", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "c", + "nodeId": "built-in" + } + ] + ], + [ + "rep", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "rep", + "nodeId": "built-in" + } + ] + ], + [ + "seq", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "seq", + "nodeId": "built-in" + } + ] + ], + [ + "seq_len", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "seq_len", + "nodeId": "built-in" + } + ] + ], + [ + "seq_along", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "seq_along", + "nodeId": "built-in" + } + ] + ], + [ + "seq.int", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "seq.int", + "nodeId": "built-in" + } + ] + ], + [ + "gsub", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "gsub", + "nodeId": "built-in" + } + ] + ], + [ + "which", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "which", + "nodeId": "built-in" + } + ] + ], + [ + "class", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "class", + "nodeId": "built-in" + } + ] + ], + [ + "dimnames", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "dimnames", + "nodeId": "built-in" + } + ] + ], + [ + "min", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "min", + "nodeId": "built-in" + } + ] + ], + [ + "max", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "max", + "nodeId": "built-in" + } + ] + ], + [ + "intersect", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "intersect", + "nodeId": "built-in" + } + ] + ], + [ + "subset", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "subset", + "nodeId": "built-in" + } + ] + ], + [ + "match", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "match", + "nodeId": "built-in" + } + ] + ], + [ + "sqrt", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "sqrt", + "nodeId": "built-in" + } + ] + ], + [ + "abs", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "abs", + "nodeId": "built-in" + } + ] + ], + [ + "round", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "round", + "nodeId": "built-in" + } + ] + ], + [ + "floor", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "floor", + "nodeId": "built-in" + } + ] + ], + [ + "ceiling", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "ceiling", + "nodeId": "built-in" + } + ] + ], + [ + "signif", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "signif", + "nodeId": "built-in" + } + ] + ], + [ + "trunc", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "trunc", + "nodeId": "built-in" + } + ] + ], + [ + "log", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "log", + "nodeId": "built-in" + } + ] + ], + [ + "log10", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "log10", + "nodeId": "built-in" + } + ] + ], + [ + "log2", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "log2", + "nodeId": "built-in" + } + ] + ], + [ + "sum", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "sum", + "nodeId": "built-in" + } + ] + ], + [ + "mean", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "mean", + "nodeId": "built-in" + } + ] + ], + [ + "unique", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "unique", + "nodeId": "built-in" + } + ] + ], + [ + "paste", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "paste", + "nodeId": "built-in" + } + ] + ], + [ + "paste0", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "paste0", + "nodeId": "built-in" + } + ] + ], + [ + "read.csv", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "read.csv", + "nodeId": "built-in" + } + ] + ], + [ + "stop", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "stop", + "nodeId": "built-in" + } + ] + ], + [ + "is.null", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "is.null", + "nodeId": "built-in" + } + ] + ], + [ + "plot", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "plot", + "nodeId": "built-in" + } + ] + ], + [ + "numeric", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "numeric", + "nodeId": "built-in" + } + ] + ], + [ + "as.character", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "as.character", + "nodeId": "built-in" + } + ] + ], + [ + "as.integer", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "as.integer", + "nodeId": "built-in" + } + ] + ], + [ + "as.logical", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "as.logical", + "nodeId": "built-in" + } + ] + ], + [ + "as.numeric", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "as.numeric", + "nodeId": "built-in" + } + ] + ], + [ + "as.matrix", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "as.matrix", + "nodeId": "built-in" + } + ] + ], + [ + "do.call", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "do.call", + "nodeId": "built-in" + } + ] + ], + [ + "rbind", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "rbind", + "nodeId": "built-in" + } + ] + ], + [ + "nrow", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "nrow", + "nodeId": "built-in" + } + ] + ], + [ + "ncol", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "ncol", + "nodeId": "built-in" + } + ] + ], + [ + "tryCatch", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "tryCatch", + "nodeId": "built-in" + } + ] + ], + [ + "expression", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "expression", + "nodeId": "built-in" + } + ] + ], + [ + "factor", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "factor", + "nodeId": "built-in" + } + ] + ], + [ + "missing", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "missing", + "nodeId": "built-in" + } + ] + ], + [ + "as.data.frame", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "as.data.frame", + "nodeId": "built-in" + } + ] + ], + [ + "data.frame", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "data.frame", + "nodeId": "built-in" + } + ] + ], + [ + "na.omit", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "na.omit", + "nodeId": "built-in" + } + ] + ], + [ + "rownames", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "rownames", + "nodeId": "built-in" + } + ] + ], + [ + "names", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "names", + "nodeId": "built-in" + } + ] + ], + [ + "order", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "order", + "nodeId": "built-in" + } + ] + ], + [ + "length", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "length", + "nodeId": "built-in" + } + ] + ], + [ + "any", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "any", + "nodeId": "built-in" + } + ] + ], + [ + "dim", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "dim", + "nodeId": "built-in" + } + ] + ], + [ + "matrix", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "matrix", + "nodeId": "built-in" + } + ] + ], + [ + "cbind", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "cbind", + "nodeId": "built-in" + } + ] + ], + [ + "nchar", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "nchar", + "nodeId": "built-in" + } + ] + ], + [ + "t", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "t", + "nodeId": "built-in" + } + ] + ], + [ + "options", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "options", + "nodeId": "built-in" + } + ] + ], + [ + "mapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "mapply", + "nodeId": "built-in" + } + ] + ], + [ + "Mapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "Mapply", + "nodeId": "built-in" + } + ] + ], + [ + "lapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "lapply", + "nodeId": "built-in" + } + ] + ], + [ + "sapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "sapply", + "nodeId": "built-in" + } + ] + ], + [ + "vapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "vapply", + "nodeId": "built-in" + } + ] + ], + [ + "Lapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "Lapply", + "nodeId": "built-in" + } + ] + ], + [ + "Sapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "Sapply", + "nodeId": "built-in" + } + ] + ], + [ + "Vapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "Vapply", + "nodeId": "built-in" + } + ] + ], + [ + "apply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "apply", + "nodeId": "built-in" + } + ] + ], + [ + "tapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "tapply", + "nodeId": "built-in" + } + ] + ], + [ + "Tapply", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "Tapply", + "nodeId": "built-in" + } + ] + ], + [ + "print", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "print", + "nodeId": "built-in" + } + ] + ], + [ + "(", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "(", + "nodeId": "built-in" + } + ] + ], + [ + "load", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "load", + "nodeId": "built-in" + } + ] + ], + [ + "load_all", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "load_all", + "nodeId": "built-in" + } + ] + ], + [ + "setwd", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setwd", + "nodeId": "built-in" + } + ] + ], + [ + "set.seed", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "set.seed", + "nodeId": "built-in" + } + ] + ], + [ + "eval", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "eval", + "nodeId": "built-in" + } + ] + ], + [ + "body", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "body", + "nodeId": "built-in" + } + ] + ], + [ + "formals", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "formals", + "nodeId": "built-in" + } + ] + ], + [ + "environment", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "environment", + "nodeId": "built-in" + } + ] + ], + [ + "cat", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "cat", + "nodeId": "built-in" + } + ] + ], + [ + "switch", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "switch", + "nodeId": "built-in" + } + ] + ], + [ + "return", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "return", + "nodeId": "built-in" + } + ] + ], + [ + "break", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "break", + "nodeId": "built-in" + } + ] + ], + [ + "next", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "next", + "nodeId": "built-in" + } + ] + ], + [ + "{", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "{", + "nodeId": "built-in" + } + ] + ], + [ + "source", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "source", + "nodeId": "built-in" + } + ] + ], + [ + "[", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "[", + "nodeId": "built-in" + } + ] + ], + [ + "[[", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "[[", + "nodeId": "built-in" + } + ] + ], + [ + "$", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "$", + "nodeId": "built-in" + } + ] + ], + [ + "@", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "@", + "nodeId": "built-in" + } + ] + ], + [ + "if", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "if", + "nodeId": "built-in" + } + ] + ], + [ + "ifelse", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "ifelse", + "nodeId": "built-in" + } + ] + ], + [ + "get", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "get", + "nodeId": "built-in" + } + ] + ], + [ + "library", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "library", + "nodeId": "built-in" + } + ] + ], + [ + "require", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "require", + "nodeId": "built-in" + } + ] + ], + [ + "<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "<-", + "nodeId": "built-in" + } + ] + ], + [ + "=", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "=", + "nodeId": "built-in" + } + ] + ], + [ + ":=", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": ":=", + "nodeId": "built-in" + } + ] + ], + [ + "assign", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "assign", + "nodeId": "built-in" + } + ] + ], + [ + "delayedAssign", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "delayedAssign", + "nodeId": "built-in" + } + ] + ], + [ + "<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "<<-", + "nodeId": "built-in" + } + ] + ], + [ + "->", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "->", + "nodeId": "built-in" + } + ] + ], + [ + "->>", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "->>", + "nodeId": "built-in" + } + ] + ], + [ + "&&", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "&&", + "nodeId": "built-in" + } + ] + ], + [ + "&", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "&", + "nodeId": "built-in" + } + ] + ], + [ + "||", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "||", + "nodeId": "built-in" + } + ] + ], + [ + "|", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "|", + "nodeId": "built-in" + } + ] + ], + [ + "|>", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "|>", + "nodeId": "built-in" + } + ] + ], + [ + "%>%", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "%>%", + "nodeId": "built-in" + } + ] + ], + [ + "function", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "function", + "nodeId": "built-in" + } + ] + ], + [ + "\\", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "\\", + "nodeId": "built-in" + } + ] + ], + [ + "quote", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "quote", + "nodeId": "built-in" + } + ] + ], + [ + "substitute", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "substitute", + "nodeId": "built-in" + } + ] + ], + [ + "bquote", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "bquote", + "nodeId": "built-in" + } + ] + ], + [ + "for", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "for", + "nodeId": "built-in" + } + ] + ], + [ + "repeat", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "repeat", + "nodeId": "built-in" + } + ] + ], + [ + "while", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "while", + "nodeId": "built-in" + } + ] + ], + [ + "on.exit", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "on.exit", + "nodeId": "built-in" + } + ] + ], + [ + "sys.on.exit", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "sys.on.exit", + "nodeId": "built-in" + } + ] + ], + [ + "par", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "par", + "nodeId": "built-in" + } + ] + ], + [ + "setnames", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setnames", + "nodeId": "built-in" + } + ] + ], + [ + "setNames", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setNames", + "nodeId": "built-in" + } + ] + ], + [ + "setkey", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setkey", + "nodeId": "built-in" + } + ] + ], + [ + "setkeyv", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setkeyv", + "nodeId": "built-in" + } + ] + ], + [ + "setindex", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setindex", + "nodeId": "built-in" + } + ] + ], + [ + "setindexv", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setindexv", + "nodeId": "built-in" + } + ] + ], + [ + "setattr", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "setattr", + "nodeId": "built-in" + } + ] + ], + [ + "sink", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "sink", + "nodeId": "built-in" + } + ] + ], + [ + "requireNamespace", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "requireNamespace", + "nodeId": "built-in" + } + ] + ], + [ + "loadNamespace", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "loadNamespace", + "nodeId": "built-in" + } + ] + ], + [ + "attachNamespace", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "attachNamespace", + "nodeId": "built-in" + } + ] + ], + [ + "asNamespace", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "asNamespace", + "nodeId": "built-in" + } + ] + ], + [ + "library.dynam", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "library.dynam", + "nodeId": "built-in" + } + ] + ], + [ + "install.packages", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install.packages", + "nodeId": "built-in" + } + ] + ], + [ + "install", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install", + "nodeId": "built-in" + } + ] + ], + [ + "install_github", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_github", + "nodeId": "built-in" + } + ] + ], + [ + "install_gitlab", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_gitlab", + "nodeId": "built-in" + } + ] + ], + [ + "install_bitbucket", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_bitbucket", + "nodeId": "built-in" + } + ] + ], + [ + "install_url", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_url", + "nodeId": "built-in" + } + ] + ], + [ + "install_git", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_git", + "nodeId": "built-in" + } + ] + ], + [ + "install_svn", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_svn", + "nodeId": "built-in" + } + ] + ], + [ + "install_local", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_local", + "nodeId": "built-in" + } + ] + ], + [ + "install_version", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "install_version", + "nodeId": "built-in" + } + ] + ], + [ + "update_packages", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "update_packages", + "nodeId": "built-in" + } + ] + ], + [ + "attach", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "attach", + "nodeId": "built-in" + } + ] + ], + [ + "detach", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "detach", + "nodeId": "built-in" + } + ] + ], + [ + "unname", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "unname", + "nodeId": "built-in" + } + ] + ], + [ + "rm", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "rm", + "nodeId": "built-in" + } + ] + ], + [ + "remove", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "remove", + "nodeId": "built-in" + } + ] + ], + [ + "[<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "[<-", + "nodeId": "built-in" + } + ] + ], + [ + "[<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "[<<-", + "nodeId": "built-in" + } + ] + ], + [ + "[[<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "[[<-", + "nodeId": "built-in" + } + ] + ], + [ + "[[<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "[[<<-", + "nodeId": "built-in" + } + ] + ], + [ + "$<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "$<-", + "nodeId": "built-in" + } + ] + ], + [ + "$<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "$<<-", + "nodeId": "built-in" + } + ] + ], + [ + "@<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "@<-", + "nodeId": "built-in" + } + ] + ], + [ + "@<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "@<<-", + "nodeId": "built-in" + } + ] + ], + [ + "names<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "names<-", + "nodeId": "built-in" + } + ] + ], + [ + "names<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "names<<-", + "nodeId": "built-in" + } + ] + ], + [ + "dimnames<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "dimnames<-", + "nodeId": "built-in" + } + ] + ], + [ + "dimnames<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "dimnames<<-", + "nodeId": "built-in" + } + ] + ], + [ + "attributes<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "attributes<-", + "nodeId": "built-in" + } + ] + ], + [ + "attributes<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "attributes<<-", + "nodeId": "built-in" + } + ] + ], + [ + "attr<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "attr<-", + "nodeId": "built-in" + } + ] + ], + [ + "attr<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "attr<<-", + "nodeId": "built-in" + } + ] + ], + [ + "class<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "class<-", + "nodeId": "built-in" + } + ] + ], + [ + "class<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "class<<-", + "nodeId": "built-in" + } + ] + ], + [ + "levels<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "levels<-", + "nodeId": "built-in" + } + ] + ], + [ + "levels<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "levels<<-", + "nodeId": "built-in" + } + ] + ], + [ + "rownames<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "rownames<-", + "nodeId": "built-in" + } + ] + ], + [ + "rownames<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "rownames<<-", + "nodeId": "built-in" + } + ] + ], + [ + "colnames<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "colnames<-", + "nodeId": "built-in" + } + ] + ], + [ + "colnames<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "colnames<<-", + "nodeId": "built-in" + } + ] + ], + [ + "body<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "body<-", + "nodeId": "built-in" + } + ] + ], + [ + "body<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "body<<-", + "nodeId": "built-in" + } + ] + ], + [ + "environment<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "environment<-", + "nodeId": "built-in" + } + ] + ], + [ + "environment<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "environment<<-", + "nodeId": "built-in" + } + ] + ], + [ + "formals<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "formals<-", + "nodeId": "built-in" + } + ] + ], + [ + "formals<<-", + [ + { + "kind": "built-in-function", + "definedAt": "built-in", + "name": "formals<<-", + "nodeId": "built-in" + } + ] + ] + ] + }, + "memory": [ + [ + "x", + [ + { + "nodeId": 0, + "name": "x", + "kind": "variable", + "definedAt": 2 + } + ] + ] + ] + }, + "level": 0 + }, + "graph": { + "_idMap": { + "size": 13, + "k2v": [ + [ + 0, + { + "type": "RSymbol", + "location": [ + 1, + 1, + 1, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 1, + 1, + 1, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 0, + "parent": 2, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + } + ], + [ + 1, + { + "location": [ + 1, + 6, + 1, + 6 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 1, + 6, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 1, + "parent": 2, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + } + ], + [ + 2, + { + "type": "RBinaryOp", + "location": [ + 1, + 3, + 1, + 4 + ], + "lhs": { + "type": "RSymbol", + "location": [ + 1, + 1, + 1, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 1, + 1, + 1, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 0, + "parent": 2, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + }, + "rhs": { + "location": [ + 1, + 6, + 1, + 6 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 1, + 6, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 1, + "parent": 2, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + }, + "operator": "<-", + "lexeme": "<-", + "info": { + "fullRange": [ + 1, + 1, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "x <- 1", + "id": 2, + "parent": 6, + "nesting": 0, + "index": 0, + "role": "expr-list-child" + } + } + ], + [ + 3, + { + "type": "RSymbol", + "location": [ + 2, + 1, + 2, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 2, + 1, + 2, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 3, + "parent": 5, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + } + ], + [ + 4, + { + "location": [ + 2, + 5, + 2, + 5 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 2, + 5, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 4, + "parent": 5, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + } + ], + [ + 5, + { + "type": "RBinaryOp", + "location": [ + 2, + 3, + 2, + 3 + ], + "lhs": { + "type": "RSymbol", + "location": [ + 2, + 1, + 2, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 2, + 1, + 2, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 3, + "parent": 5, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + }, + "rhs": { + "location": [ + 2, + 5, + 2, + 5 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 2, + 5, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 4, + "parent": 5, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + }, + "operator": "+", + "lexeme": "+", + "info": { + "fullRange": [ + 2, + 1, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "x + 1", + "id": 5, + "parent": 6, + "nesting": 0, + "index": 1, + "role": "expr-list-child" + } + } + ], + [ + 6, + { + "type": "RExpressionList", + "children": [ + { + "type": "RBinaryOp", + "location": [ + 1, + 3, + 1, + 4 + ], + "lhs": { + "type": "RSymbol", + "location": [ + 1, + 1, + 1, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 1, + 1, + 1, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 0, + "parent": 2, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + }, + "rhs": { + "location": [ + 1, + 6, + 1, + 6 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 1, + 6, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 1, + "parent": 2, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + }, + "operator": "<-", + "lexeme": "<-", + "info": { + "fullRange": [ + 1, + 1, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "x <- 1", + "id": 2, + "parent": 6, + "nesting": 0, + "index": 0, + "role": "expr-list-child" + } + }, + { + "type": "RBinaryOp", + "location": [ + 2, + 3, + 2, + 3 + ], + "lhs": { + "type": "RSymbol", + "location": [ + 2, + 1, + 2, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 2, + 1, + 2, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 3, + "parent": 5, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + }, + "rhs": { + "location": [ + 2, + 5, + 2, + 5 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 2, + 5, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 4, + "parent": 5, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + }, + "operator": "+", + "lexeme": "+", + "info": { + "fullRange": [ + 2, + 1, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "x + 1", + "id": 5, + "parent": 6, + "nesting": 0, + "index": 1, + "role": "expr-list-child" + } + } + ], + "info": { + "additionalTokens": [], + "id": 6, + "nesting": 0, + "role": "root", + "index": 0 + } + } + ], + [ + "2-arg", + { + "type": "RBinaryOp", + "location": [ + 1, + 3, + 1, + 4 + ], + "lhs": { + "type": "RSymbol", + "location": [ + 1, + 1, + 1, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 1, + 1, + 1, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 0, + "parent": 2, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + }, + "rhs": { + "location": [ + 1, + 6, + 1, + 6 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 1, + 6, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 1, + "parent": 2, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + }, + "operator": "<-", + "lexeme": "<-", + "info": { + "fullRange": [ + 1, + 1, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "x <- 1", + "id": 2, + "parent": 6, + "nesting": 0, + "index": 0, + "role": "expr-list-child" + } + } + ], + [ + "5-arg", + { + "type": "RBinaryOp", + "location": [ + 2, + 3, + 2, + 3 + ], + "lhs": { + "type": "RSymbol", + "location": [ + 2, + 1, + 2, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 2, + 1, + 2, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 3, + "parent": 5, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + }, + "rhs": { + "location": [ + 2, + 5, + 2, + 5 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 2, + 5, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 4, + "parent": 5, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + }, + "operator": "+", + "lexeme": "+", + "info": { + "fullRange": [ + 2, + 1, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "x + 1", + "id": 5, + "parent": 6, + "nesting": 0, + "index": 1, + "role": "expr-list-child" + } + } + ], + [ + "0-arg", + { + "type": "RSymbol", + "location": [ + 1, + 1, + 1, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 1, + 1, + 1, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 0, + "parent": 2, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + } + ], + [ + "1-arg", + { + "location": [ + 1, + 6, + 1, + 6 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 1, + 6, + 1, + 6 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 1, + "parent": 2, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + } + ], + [ + "3-arg", + { + "type": "RSymbol", + "location": [ + 2, + 1, + 2, + 1 + ], + "content": "x", + "lexeme": "x", + "info": { + "fullRange": [ + 2, + 1, + 2, + 1 + ], + "additionalTokens": [], + "fullLexeme": "x", + "id": 3, + "parent": 5, + "role": "binop-lhs", + "index": 0, + "nesting": 0 + } + } + ], + [ + "4-arg", + { + "location": [ + 2, + 5, + 2, + 5 + ], + "lexeme": "1", + "info": { + "fullRange": [ + 2, + 5, + 2, + 5 + ], + "additionalTokens": [], + "fullLexeme": "1", + "id": 4, + "parent": 5, + "role": "binop-rhs", + "index": 1, + "nesting": 0 + }, + "type": "RNumber", + "content": { + "num": 1, + "complexNumber": false, + "markedAsInt": false + } + } + ] + ], + "v2k": {} + }, + "_unknownSideEffects": [], + "rootVertices": [ + 1, + 0, + 2, + 3, + 4, + 5 + ], + "vertexInformation": [ + [ + 1, + { + "tag": "value", + "id": 1 + } + ], + [ + 0, + { + "tag": "variable-definition", + "id": 0 + } + ], + [ + 2, + { + "tag": "function-call", + "id": 2, + "name": "<-", + "onlyBuiltin": true, + "args": [ + { + "nodeId": 0 + }, + { + "nodeId": 1 + } + ] + } + ], + [ + 3, + { + "tag": "use", + "id": 3 + } + ], + [ + 4, + { + "tag": "value", + "id": 4 + } + ], + [ + 5, + { + "tag": "function-call", + "id": 5, + "name": "+", + "onlyBuiltin": true, + "args": [ + { + "nodeId": 3 + }, + { + "nodeId": 4 + } + ] + } + ] + ], + "edgeInformation": [ + [ + 2, + [ + [ + 1, + { + "types": 64 + } + ], + [ + 0, + { + "types": 72 + } + ] + ] + ], + [ + 0, + [ + [ + 1, + { + "types": 2 + } + ], + [ + 2, + { + "types": 2 + } + ] + ] + ], + [ + 3, + [ + [ + 0, + { + "types": 1 + } + ] + ] + ], + [ + 5, + [ + [ + 3, + { + "types": 65 + } + ], + [ + 4, + { + "types": 65 + } + ] + ] + ] + ] + }, + "entryPoint": 2, + "exitPoints": [ + { + "type": 0, + "nodeId": 5 + } + ] +} +``` + +
+ +So let's start by looking at the properties of the dataflow information object: `unknownReferences`, `in`, `out`, `environment`, `graph`, `entryPoint`, `exitPoints`. + + + +There are three sets of references. +**in** (ids: [2,5]) and **out** (ids: [0]) contain the +ingoing and outgoing references of the subgraph at hand (in this case, the whole code, as we are at the end of the dataflow analysis). +Besides the Ids, they also contain important meta-information (e.g., what is to be read). +The third set, **unknownReferences**, contains all references that are not yet identified as read or written +(the example does not have any, but, for example, `x` (with id 0) would first be unknown and then later classified as a definition). + +The **environment** property contains the active environment information of the subgraph. +In other words, this is a linked list of tables (scopes), mapping identifiers to their respective definitions. +A summarized version of the produced environment looks like this: + +| Name | Definitions | +|------|-------------| +| `x` | {x (0, variable, def. @2)} | + +
Parent Environment + +_Built-in Environment (210 entries)_ + +
+ +This shows us that the local environment contains a single definition for `x` (with id 0) and that the parent environment is the built-in environment. +Additionally, we get the information that the node with the id 2 was responsible for the definition of `x`. + +Last but not least, the information contains the single **entry point** (2) and a set of **exit points** ([5]). +Besides marking potential exits, the exit points also provide information about why the exit occurs and which control dependencies affect the exit. + +### Unknown Side Effects + +In case _flowR_ encounters a function call that it cannot handle, it marks the call as an unknown side effect. +You can find these as part of the dataflow graph, specifically as `unknownSideEffects` (with a leading underscore if sesrialized as Json). +In the following graph, _flowR_ realizes that it is unable to correctly handle the impacts of the `load` call and therefore marks it as such (marked in bright red): + + + + +```mermaid +flowchart LR + 1{{"`#91;RString#93; #34;file#34; + (1) + *1.6-11*`"}} + 3[["`#91;RFunctionCall#93; load + (3) + *1.1-12* + (1)`"]] + style 3 stroke:red,stroke-width:5px; + 5(["`#91;RSymbol#93; x + (5) + *2.7*`"]) + 6(["`#91;RSymbol#93; y + (6) + *2.11*`"]) + 7[["`#91;RBinaryOp#93; #43; + (7) + *2.7-11* + (5, 6)`"]] + 9[["`#91;RFunctionCall#93; print + (9) + *2.1-12* + (7)`"]] + 3 -->|"argument"| 1 + 7 -->|"reads, argument"| 5 + 7 -->|"reads, argument"| 6 + 9 -->|"reads, returns, argument"| 7 +``` + +
+ +R Code of the Dataflow Graph + +The analysis required _3.98 ms_ (including parsing and normalization) within the generation environment. + +We encountered unknown side effects (with ids: [3]) during the analysis. + +```r +load("file") +print(x + y) +``` + +
+ +Mermaid Code + +``` +flowchart LR + 1{{"`#91;RString#93; #34;file#34; + (1) + *1.6-11*`"}} + 3[["`#91;RFunctionCall#93; load + (3) + *1.1-12* + (1)`"]] + style 3 stroke:red,stroke-width:5px; + 5(["`#91;RSymbol#93; x + (5) + *2.7*`"]) + 6(["`#91;RSymbol#93; y + (6) + *2.11*`"]) + 7[["`#91;RBinaryOp#93; #43; + (7) + *2.7-11* + (5, 6)`"]] + 9[["`#91;RFunctionCall#93; print + (9) + *2.1-12* + (7)`"]] + 3 -->|"argument"| 1 + 7 -->|"reads, argument"| 5 + 7 -->|"reads, argument"| 6 + 9 -->|"reads, returns, argument"| 7 +``` + +
+ +
+ + + +In general, as we cannot handle these correctly, we leave it up to other analyses (and [queries](https://github.com/flowr-analysis/flowr/wiki//Query API)) to handle these cases +as they see fit. + + From 326b95e7f6b1c01b8783716a242074e4855e6f48 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 13:36:25 +0200 Subject: [PATCH 34/41] lint-fix: handle linter problems --- src/documentation/doc-util/doc-dfg.ts | 2 +- src/documentation/doc-util/doc-env.ts | 27 +++++------ .../print-dataflow-graph-wiki.ts | 46 ++++++++++--------- 3 files changed, 39 insertions(+), 36 deletions(-) diff --git a/src/documentation/doc-util/doc-dfg.ts b/src/documentation/doc-util/doc-dfg.ts index 35c1343616..3c0d3027fc 100644 --- a/src/documentation/doc-util/doc-dfg.ts +++ b/src/documentation/doc-util/doc-dfg.ts @@ -11,7 +11,7 @@ import type { DataflowDifferenceReport } from '../../dataflow/graph/diff'; import { diffOfDataflowGraphs } from '../../dataflow/graph/diff'; import { guard } from '../../util/assert'; import { printAsMs } from './doc-ms'; -import {jsonReplacer} from "../../util/json"; +import { jsonReplacer } from '../../util/json'; export function printDfGraph(graph: DataflowGraph, mark?: ReadonlySet) { return ` diff --git a/src/documentation/doc-util/doc-env.ts b/src/documentation/doc-util/doc-env.ts index 55d2c89226..219df94dd9 100644 --- a/src/documentation/doc-util/doc-env.ts +++ b/src/documentation/doc-util/doc-env.ts @@ -1,17 +1,18 @@ -import {BuiltInEnvironment, IEnvironment} from "../../dataflow/environments/environment"; -import {printIdentifier} from "../../util/mermaid/dfg"; +import type { IEnvironment } from '../../dataflow/environments/environment'; +import { BuiltInEnvironment } from '../../dataflow/environments/environment'; +import { printIdentifier } from '../../util/mermaid/dfg'; export function printEnvironmentToMarkdown(env: IEnvironment | undefined): string { - if(env === undefined) { - return '?? (error)'; - } else if(env.id === BuiltInEnvironment.id) { - return `_Built-in Environment (${env.memory.size} entries)_`; - } + if(env === undefined) { + return '?? (error)'; + } else if(env.id === BuiltInEnvironment.id) { + return `_Built-in Environment (${env.memory.size} entries)_`; + } - const lines = ['| Name | Definitions |', '|------|-------------|']; - for(const [name, defs] of env.memory.entries()) { - const printName = `\`${name}\``; - lines.push(`| ${printName} | {${defs.map(printIdentifier).join(', ')}} |`); - } - return lines.join('\n') + `\n\n
Parent Environment\n\n` + printEnvironmentToMarkdown(env.parent) + '\n\n
'; + const lines = ['| Name | Definitions |', '|------|-------------|']; + for(const [name, defs] of env.memory.entries()) { + const printName = `\`${name}\``; + lines.push(`| ${printName} | {${defs.map(printIdentifier).join(', ')}} |`); + } + return lines.join('\n') + '\n\n
Parent Environment\n\n' + printEnvironmentToMarkdown(env.parent) + '\n\n
'; } diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index c6da19e3e8..2e6eeaba93 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -1,5 +1,5 @@ import { DataflowGraph } from '../dataflow/graph/graph'; -import {MermaidMarkdownMark} from '../util/mermaid/dfg'; +import type { MermaidMarkdownMark } from '../util/mermaid/dfg'; import { RShell } from '../r-bridge/shell'; import { VertexType } from '../dataflow/graph/vertex'; import { EdgeType } from '../dataflow/graph/edge'; @@ -8,16 +8,16 @@ import { guard } from '../util/assert'; import { defaultEnv } from '../../test/functionality/_helper/dataflow/environment-builder'; import { setMinLevelOfAllLogs } from '../../test/functionality/_helper/log'; import { LogLevel } from '../util/log'; -import {printDfGraph, printDfGraphForCode, verifyExpectedSubgraph} from './doc-util/doc-dfg'; -import {FlowrWikiBaseRef, getFilePathMd} from './doc-util/doc-files'; +import { printDfGraph, printDfGraphForCode, verifyExpectedSubgraph } from './doc-util/doc-dfg'; +import { FlowrWikiBaseRef, getFilePathMd } from './doc-util/doc-files'; import { autoGenHeader } from './doc-util/doc-auto-gen'; import { nth } from '../util/text'; -import {PipelineExecutor} from "../core/pipeline-executor"; -import {DEFAULT_DATAFLOW_PIPELINE} from "../core/steps/pipeline/default-pipelines"; -import {requestFromInput} from "../r-bridge/retriever"; -import {PipelineOutput} from "../core/steps/pipeline/pipeline"; -import {jsonReplacer} from "../util/json"; -import {printEnvironmentToMarkdown} from "./doc-util/doc-env"; +import { PipelineExecutor } from '../core/pipeline-executor'; +import { DEFAULT_DATAFLOW_PIPELINE } from '../core/steps/pipeline/default-pipelines'; +import { requestFromInput } from '../r-bridge/retriever'; +import type { PipelineOutput } from '../core/steps/pipeline/pipeline'; +import { jsonReplacer } from '../util/json'; +import { printEnvironmentToMarkdown } from './doc-util/doc-env'; export interface SubExplanationParameters { readonly name: string, @@ -285,10 +285,10 @@ async function getEdgesExplanations(shell: RShell): Promise { } async function dummyDataflow(): Promise> { - const shell = new RShell() + const shell = new RShell(); const result = await new PipelineExecutor(DEFAULT_DATAFLOW_PIPELINE, { shell, - request: requestFromInput('x <- 1\nx + 1') + request: requestFromInput('x <- 1\nx + 1') }).allRemainingSteps(); shell.close(); return result; @@ -396,11 +396,11 @@ ${dummyDataflow.toString()} Now, you can find the dataflow _information_ with \`result.dataflow\`. More specifically, the graph is stored in \`result.dataflow.graph\` and looks like this: ${ -await (async () => { - const result = await dummyDataflow(); - const dfGraphString = printDfGraph(result.dataflow.graph) + await (async() => { + const result = await dummyDataflow(); + const dfGraphString = printDfGraph(result.dataflow.graph); - return ` + return ` ${dfGraphString} However, the dataflow information contains more, quite a lot of information in fact. @@ -417,7 +417,9 @@ ${JSON.stringify(result.dataflow, jsonReplacer, 2)} So let's start by looking at the properties of the dataflow information object: ${Object.keys(result.dataflow).map(k => `\`${k}\``).join(', ')}. -${ (() => { guard(Object.keys(result.dataflow).length === 7, () => 'Update Dataflow Documentation!'); return ''; })() } +${ (() => { + guard(Object.keys(result.dataflow).length === 7, () => 'Update Dataflow Documentation!'); return ''; + })() } There are three sets of references. **in** (ids: ${JSON.stringify(new Set(result.dataflow.in.map(n => n.nodeId)), jsonReplacer)}) and **out** (ids: ${JSON.stringify(new Set(result.dataflow.out.map(n => n.nodeId)), jsonReplacer)}) contain the @@ -432,16 +434,16 @@ A summarized version of the produced environment looks like this: ${ printEnvironmentToMarkdown(result.dataflow.environment.current) -} + } This shows us that the local environment contains a single definition for \`x\` (with id 0) and that the parent environment is the built-in environment. Additionally, we get the information that the node with the id 2 was responsible for the definition of \`x\`. Last but not least, the information contains the single **entry point** (${ JSON.stringify(result.dataflow.entryPoint) - }) and a set of **exit points** (${ - JSON.stringify(result.dataflow.exitPoints.map(e => e.nodeId)) - }). + }) and a set of **exit points** (${ + JSON.stringify(result.dataflow.exitPoints.map(e => e.nodeId)) + }). Besides marking potential exits, the exit points also provide information about why the exit occurs and which control dependencies affect the exit. ### Unknown Side Effects @@ -454,9 +456,9 @@ ${await printDfGraphForCode(shell,'load("file")\nprint(x + y)')} In general, as we cannot handle these correctly, we leave it up to other analyses (and [queries](${FlowrWikiBaseRef}/Query API)) to handle these cases as they see fit. - ` + `; -})() + })() } `; From ffd814551400f02f79dcc2e3e6516eacca6491dd Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 13:38:42 +0200 Subject: [PATCH 35/41] feat-fix(dfg): link with space --- src/documentation/print-dataflow-graph-wiki.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/documentation/print-dataflow-graph-wiki.ts b/src/documentation/print-dataflow-graph-wiki.ts index 2e6eeaba93..c5b6971a14 100644 --- a/src/documentation/print-dataflow-graph-wiki.ts +++ b/src/documentation/print-dataflow-graph-wiki.ts @@ -454,7 +454,7 @@ In the following graph, _flowR_ realizes that it is unable to correctly handle t ${await printDfGraphForCode(shell,'load("file")\nprint(x + y)')} -In general, as we cannot handle these correctly, we leave it up to other analyses (and [queries](${FlowrWikiBaseRef}/Query API)) to handle these cases +In general, as we cannot handle these correctly, we leave it up to other analyses (and [queries](${FlowrWikiBaseRef}/Query%20API)) to handle these cases as they see fit. `; From cb5b5dc0a80857351c7bc76932a33d96a8c62383 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Mon, 23 Sep 2024 14:38:43 +0200 Subject: [PATCH 36/41] feat-fix: remove sources of double ref introduction --- src/dataflow/internal/linker.ts | 1 + .../call/built-in/built-in-assignment.ts | 3 +- .../call/built-in/built-in-expression-list.ts | 7 +- .../call/built-in/built-in-for-loop.ts | 2 +- test/functionality/cli/server.spec.ts | 1 - .../dataflow-if-then-tests.ts | 4 +- wiki/Dataflow Graph.md | 73 +++++++------------ 7 files changed, 35 insertions(+), 56 deletions(-) diff --git a/src/dataflow/internal/linker.ts b/src/dataflow/internal/linker.ts index 22875e3b09..3be20963ea 100644 --- a/src/dataflow/internal/linker.ts +++ b/src/dataflow/internal/linker.ts @@ -49,6 +49,7 @@ export function findNonLocalReads(graph: DataflowGraph): IdentifierReference[] { nodeId: id, controlDependencies: undefined }); + break; } } } diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts index 92ed4fc571..62fad60904 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts @@ -279,7 +279,6 @@ function processAssignmentToSymbol({ // we drop the first arg which we use to pass along arguments :D const readFromSourceWritten = sourceArg.out.slice(1); const readTargets: readonly IdentifierReference[] = [{ nodeId: rootId, name: nameOfAssignmentFunction, controlDependencies: data.controlDependencies }, ...sourceArg.unknownReferences, ...sourceArg.in, ...targetArg.in.filter(i => i.nodeId !== target.info.id), ...readFromSourceWritten]; - const writeTargets = [...writeNodes, ...writeNodes, ...readFromSourceWritten]; information.environment = overwriteEnvironment(targetArg.environment, sourceArg.environment); @@ -299,6 +298,6 @@ function processAssignmentToSymbol({ unknownReferences: [], entryPoint: rootId, in: readTargets, - out: writeTargets + out: [...writeNodes, ...readFromSourceWritten] }; } diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts index 84a34fa8af..f9c9d78e68 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts @@ -35,8 +35,11 @@ function linkReadNameToWriteIfPossible(read: IdentifierReference, environments: // record if at least one has not been defined if(probableTarget === undefined || probableTarget.some(t => !listEnvironments.has(t.nodeId) || !happensInEveryBranch(t.controlDependencies))) { - if(remainingRead.has(readName)) { - remainingRead.get(readName)?.push(read); + const has = remainingRead.get(readName); + if(has) { + if(!has?.some(h => h.nodeId === read.nodeId && h.name === read.name && h.controlDependencies === read.controlDependencies)) { + has.push(read); + } } else { remainingRead.set(readName, [read]); } diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts index 27ce70e212..3ee1c9241b 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts @@ -91,7 +91,7 @@ export function processForLoop( return { unknownReferences: [], // we only want those not bound by a local variable - in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency }, ...variable.in, ...vector.in, ...vector.unknownReferences, ...[...nameIdShares.values()].flat()], + in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency }, ...vector.unknownReferences, ...[...nameIdShares.values()].flat()], out: outgoing, graph: nextGraph, entryPoint: name.info.id, diff --git a/test/functionality/cli/server.spec.ts b/test/functionality/cli/server.spec.ts index f9c14e8df3..c89fa32bce 100644 --- a/test/functionality/cli/server.spec.ts +++ b/test/functionality/cli/server.spec.ts @@ -140,7 +140,6 @@ describe('flowr', () => { const messages = socket.getMessages(['hello', 'response-file-analysis', 'response-query']); const response = messages[2] as QueryResponseMessage; - console.log(response.results); assert.exists(response.results['call-context'], 'Expected the query to return at least one result'); assert.exists(response.results['.meta'], 'Expected the query to return at least one result'); assert.equal(response.results['call-context']['kinds']['.']['subkinds']['.'].length, 1, 'We should find one call to print!'); diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts index 8b29c6caa3..a818e87745 100644 --- a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts @@ -248,9 +248,9 @@ print(x)`, emptyGraph() .constant('1') .defineVariable('0', 'x', { definedBy: ['1', '2'] }) .constant('10') - .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependencies: [{ id: '27', when: true }] }) + .defineVariable('9', 'x', { definedBy: ['10', '11'], controlDependencies: [{ id: '19', when: true }, { id: '27', when: true }] }) .constant('16') - .defineVariable('15', 'x', { definedBy: ['16', '17'], controlDependencies: [{ id: '27', when: true }] }) + .defineVariable('15', 'x', { definedBy: ['16', '17'], controlDependencies: [{ id: '19', when: false }, { id: '27', when: true }] }) .constant('24') .defineVariable('23', 'x', { definedBy: ['24', '25'], controlDependencies: [{ id: '27', when: false }] }) ); diff --git a/wiki/Dataflow Graph.md b/wiki/Dataflow Graph.md index 192586a121..75aebd2dc7 100644 --- a/wiki/Dataflow Graph.md +++ b/wiki/Dataflow Graph.md @@ -1,4 +1,4 @@ -_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-23, 11:35:04 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, using R version 4.4.1)._ +_This document was generated automatically from '/home/happy-feet/git/phd/flowr-field/flowr/src/documentation/print-dataflow-graph-wiki.ts' on 2024-09-23, 12:04:03 UTC presenting an overview of flowR's dataflow graph (version: 2.0.25, using R version 4.4.1)._ This page briefly summarizes flowR's dataflow graph, represented by DataflowGraph in [`./src/dataflow/graph/graph.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/graph.ts). In case you want to manually build such a graph (e.g., for testing), you can use the builder in [`./src/dataflow/graph/dataflowgraph-builder.ts`](https://github.com/flowr-analysis/flowr/tree/main/./src/dataflow/graph/dataflowgraph-builder.ts). @@ -61,8 +61,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _13.22 ms_ (including parsing and normalization) within the generation environment. - +The analysis required _12.58 ms_ (including parsing and normalization) within the generation environment. We encountered no unknown side effects during the analysis. ```r @@ -194,8 +193,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.35 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 0. +The analysis required _1.12 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. We encountered no unknown side effects during the analysis. ```r @@ -246,8 +244,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.19 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 0. +The analysis required _1.01 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. We encountered no unknown side effects during the analysis. ```r @@ -298,8 +295,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.17 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 1. +The analysis required _1.03 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 1. We encountered no unknown side effects during the analysis. ```r @@ -357,8 +353,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _2.94 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 3. +The analysis required _2.13 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3. We encountered no unknown side effects during the analysis. ```r @@ -428,8 +423,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.44 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 0. +The analysis required _1.00 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. We encountered no unknown side effects during the analysis. ```r @@ -499,8 +493,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.53 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 0. +The analysis required _1.55 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0. We encountered no unknown side effects during the analysis. ```r @@ -570,8 +563,7 @@ end R Code of the Dataflow Graph -The analysis required _1.20 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 2. +The analysis required _0.98 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 2. We encountered no unknown side effects during the analysis. ```r @@ -652,8 +644,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.82 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 4->0. +The analysis required _1.67 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->0. We encountered no unknown side effects during the analysis. ```r @@ -751,8 +742,7 @@ end R Code of the Dataflow Graph -The analysis required _2.14 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 7->0. +The analysis required _1.87 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->0. We encountered no unknown side effects during the analysis. ```r @@ -854,8 +844,7 @@ end R Code of the Dataflow Graph -The analysis required _1.81 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 4->1. +The analysis required _2.39 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 4->1. We encountered no unknown side effects during the analysis. ```r @@ -947,8 +936,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.21 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 0->1, 0->2. +The analysis required _1.03 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->1, 0->2. We encountered no unknown side effects during the analysis. ```r @@ -1031,8 +1019,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.77 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 0->4, 0->3, 1->3. +The analysis required _1.31 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->4, 0->3, 1->3. We encountered no unknown side effects during the analysis. ```r @@ -1116,8 +1103,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.71 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 0->3. +The analysis required _1.02 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 0->3. We encountered no unknown side effects during the analysis. ```r @@ -1216,8 +1202,7 @@ end R Code of the Dataflow Graph -The analysis required _1.81 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 7->4. +The analysis required _1.62 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 7->4. We encountered no unknown side effects during the analysis. ```r @@ -1326,8 +1311,7 @@ end R Code of the Dataflow Graph -The analysis required _1.68 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 6->1. +The analysis required _1.48 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 6->1. We encountered no unknown side effects during the analysis. ```r @@ -1451,8 +1435,7 @@ end R Code of the Dataflow Graph -The analysis required _3.21 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. +The analysis required _1.36 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. We encountered no unknown side effects during the analysis. ```r @@ -1591,8 +1574,7 @@ end R Code of the Dataflow Graph -The analysis required _2.24 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. +The analysis required _1.84 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 11->1, 1->11. We encountered no unknown side effects during the analysis. ```r @@ -1697,8 +1679,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.64 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 5->1, 5->3. +The analysis required _0.97 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 5->1, 5->3. We encountered no unknown side effects during the analysis. ```r @@ -1806,8 +1787,7 @@ end R Code of the Dataflow Graph -The analysis required _2.90 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 3->10. +The analysis required _2.12 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->10. We encountered no unknown side effects during the analysis. ```r @@ -1912,8 +1892,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _0.92 ms_ (including parsing and normalization) within the generation environment. -The following marks are used in the graph to highlight sub-parts (uses ids): 3->1. +The analysis required _0.82 ms_ (including parsing and normalization) within the generation environment. The following marks are used in the graph to highlight sub-parts (uses ids): 3->1. We encountered no unknown side effects during the analysis. ```r @@ -1986,8 +1965,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _1.31 ms_ (including parsing and normalization) within the generation environment. - +The analysis required _2.09 ms_ (including parsing and normalization) within the generation environment. We encountered no unknown side effects during the analysis. ```r @@ -5476,8 +5454,7 @@ flowchart LR R Code of the Dataflow Graph -The analysis required _3.98 ms_ (including parsing and normalization) within the generation environment. - +The analysis required _2.20 ms_ (including parsing and normalization) within the generation environment. We encountered unknown side effects (with ids: [3]) during the analysis. ```r @@ -5525,7 +5502,7 @@ flowchart LR -In general, as we cannot handle these correctly, we leave it up to other analyses (and [queries](https://github.com/flowr-analysis/flowr/wiki//Query API)) to handle these cases +In general, as we cannot handle these correctly, we leave it up to other analyses (and [queries](https://github.com/flowr-analysis/flowr/wiki//Query%20API)) to handle these cases as they see fit. From 91d5eabdfc3bab0921bddf78551b306ae92a50f5 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Wed, 25 Sep 2024 22:39:20 +0200 Subject: [PATCH 37/41] wip: call space separation --- src/benchmark/stats/size-of.ts | 3 +- src/dataflow/environments/built-in-config.ts | 17 +++---- src/dataflow/environments/built-in.ts | 7 +-- src/dataflow/environments/clone.ts | 2 +- src/dataflow/environments/diff.ts | 4 +- src/dataflow/environments/environment.ts | 10 ++--- src/dataflow/environments/identifier.ts | 44 ++++++++++++++----- src/dataflow/environments/resolve-by-name.ts | 32 +++++++++++--- src/dataflow/graph/graph.ts | 3 +- src/dataflow/internal/linker.ts | 9 ++-- .../call/built-in/built-in-access.ts | 5 ++- .../functions/call/built-in/built-in-apply.ts | 2 + .../call/built-in/built-in-assignment.ts | 38 ++++++++++++---- .../call/built-in/built-in-expression-list.ts | 7 +-- .../call/built-in/built-in-for-loop.ts | 13 +++--- .../built-in/built-in-function-definition.ts | 4 +- .../call/built-in/built-in-if-then-else.ts | 3 +- .../functions/call/built-in/built-in-pipe.ts | 4 +- .../call/built-in/built-in-while-loop.ts | 8 ++-- .../internal/process/functions/call/common.ts | 11 ++--- .../functions/call/known-call-handling.ts | 3 +- .../functions/call/named-call-handling.ts | 7 +-- .../functions/call/unnamed-call-handling.ts | 3 +- .../process/functions/process-parameter.ts | 3 +- .../internal/process/process-symbol.ts | 3 +- .../internal/process/process-value.ts | 3 +- .../call-context-query-executor.ts | 11 ++--- src/r-bridge/data/data.ts | 6 +++ src/slicing/static/slice-call.ts | 8 ++-- src/util/mermaid/dfg.ts | 2 +- .../dataflow/environment-builder-printer.ts | 22 ++++++---- .../_helper/dataflow/environment-builder.ts | 19 +++++--- test/functionality/benchmark/slicer.spec.ts | 4 +- .../dataflow/environments/resolve-tests.ts | 23 ++++++++-- .../atomic/dataflow-atomic-tests.ts | 5 ++- .../atomic/dataflow-redefine-tests.ts | 5 ++- .../dataflow-if-then-tests.ts | 7 +-- .../expression-lists/dataflow-read-tests.ts | 3 +- .../functions/dataflow-function-call-tests.ts | 9 ++-- .../dataflow-function-definition-tests.ts | 23 +++++----- .../functions/dataflow-source-tests.ts | 3 +- .../static-program-slices/calls-tests.ts | 4 ++ 42 files changed, 261 insertions(+), 141 deletions(-) diff --git a/src/benchmark/stats/size-of.ts b/src/benchmark/stats/size-of.ts index 7de067343c..54892a35ef 100644 --- a/src/benchmark/stats/size-of.ts +++ b/src/benchmark/stats/size-of.ts @@ -4,6 +4,7 @@ import type { DataflowGraph } from '../../dataflow/graph/graph'; import type { DataflowGraphVertexInfo } from '../../dataflow/graph/vertex'; import { VertexType } from '../../dataflow/graph/vertex'; import type { Identifier, IdentifierDefinition } from '../../dataflow/environments/identifier'; +import { ReferenceType } from '../../dataflow/environments/identifier'; import sizeof from 'object-sizeof'; /* we have to kill all processors linked in the default environment as they cannot be serialized and they are shared anyway */ @@ -21,7 +22,7 @@ function killBuiltInEnv(env: IEnvironment | undefined): IEnvironment { const memory = new Map(); for(const [k, v] of env.memory) { - memory.set(k, v.filter(v => !v.kind.startsWith('built-in') && !('processor' in v))); + memory.set(k, v.filter(v => v.type !== ReferenceType.BuiltInFunction && v.type !== ReferenceType.BuiltInConstant && !('processor' in v))); } return { diff --git a/src/dataflow/environments/built-in-config.ts b/src/dataflow/environments/built-in-config.ts index 625f382458..bea30de274 100644 --- a/src/dataflow/environments/built-in-config.ts +++ b/src/dataflow/environments/built-in-config.ts @@ -1,12 +1,7 @@ -import type { - BuiltInMappingName, - ConfigOfBuiltInMappingName } from './built-in'; -import { - BuiltIn, - BuiltInMemory, BuiltInProcessorMapper, - EmptyBuiltInMemory -} from './built-in'; +import type { BuiltInMappingName, ConfigOfBuiltInMappingName } from './built-in'; +import { BuiltIn, BuiltInMemory, BuiltInProcessorMapper, EmptyBuiltInMemory } from './built-in'; import type { Identifier, IdentifierDefinition } from './identifier'; +import { ReferenceType } from './identifier'; import { guard } from '../../util/assert'; export interface BaseBuiltInDefinition { @@ -59,7 +54,7 @@ export type BuiltInDefinitions = readonly BuiltInDefinition[]; function registerBuiltInConstant({ names, value, assumePrimitive }: BuiltInConstantDefinition): void { for(const name of names) { const d: IdentifierDefinition[] = [{ - kind: 'built-in-value', + type: ReferenceType.BuiltInConstant, definedAt: BuiltIn, controlDependencies: undefined, value, @@ -81,7 +76,7 @@ export function registerBuiltInFunctions replacer(name, args, rootId, data, { makeMaybe: true, assignmentOperator: suffix }), name: effectiveName, diff --git a/src/dataflow/environments/built-in.ts b/src/dataflow/environments/built-in.ts index ede3269dd5..c76da4b669 100644 --- a/src/dataflow/environments/built-in.ts +++ b/src/dataflow/environments/built-in.ts @@ -10,6 +10,7 @@ import { processForLoop } from '../internal/process/functions/call/built-in/buil import { processRepeatLoop } from '../internal/process/functions/call/built-in/built-in-repeat-loop'; import { processWhileLoop } from '../internal/process/functions/call/built-in/built-in-while-loop'; import type { Identifier, IdentifierDefinition, IdentifierReference } from './identifier'; +import { ReferenceType } from './identifier'; import { guard } from '../../util/assert'; import { processReplacementFunction } from '../internal/process/functions/call/built-in/built-in-replacement'; import { processQuote } from '../internal/process/functions/call/built-in/built-in-quote'; @@ -46,13 +47,13 @@ export type BuiltInIdentifierProcessorWithConfig = ( ) => DataflowInformation export interface BuiltInIdentifierDefinition extends IdentifierReference { - kind: 'built-in-function' + type: ReferenceType.BuiltInFunction definedAt: typeof BuiltIn processor: BuiltInIdentifierProcessor } export interface BuiltInIdentifierConstant extends IdentifierReference { - kind: 'built-in-value' + type: ReferenceType.BuiltInConstant definedAt: typeof BuiltIn value: T } @@ -98,7 +99,7 @@ export function registerBuiltInFunctions processor(name, args, rootId, data, config), diff --git a/src/dataflow/environments/clone.ts b/src/dataflow/environments/clone.ts index 3bae7eb644..897c12ed0e 100644 --- a/src/dataflow/environments/clone.ts +++ b/src/dataflow/environments/clone.ts @@ -16,7 +16,7 @@ function cloneEnvironment(environment: IEnvironment | undefined, recurseParents: return BuiltInEnvironment; } /* make sure the clone has the same id */ - const clone = new Environment(recurseParents ? cloneEnvironment(environment.parent, recurseParents) : environment.parent, environment.id); + const clone = new Environment(recurseParents ? cloneEnvironment(environment.parent, recurseParents) : environment.parent); clone.memory = new Map(JSON.parse(JSON.stringify([...environment.memory])) as [Identifier, IdentifierDefinition[]][]); return clone; } diff --git a/src/dataflow/environments/diff.ts b/src/dataflow/environments/diff.ts index e557973d29..f9ed579f28 100644 --- a/src/dataflow/environments/diff.ts +++ b/src/dataflow/environments/diff.ts @@ -48,8 +48,8 @@ function diffMemory(a: IEnvironment, b if(aVal.definedAt !== bVal.definedAt) { info.report.addComment(`${info.position}Different definition ids (definedAt) for ${key} (${aVal.nodeId}). ${info.leftname}: ${aVal.definedAt} vs. ${info.rightname}: ${bVal.definedAt}`); } - if(aVal.kind !== bVal.kind) { - info.report.addComment(`${info.position}Different kinds for ${key} (${aVal.nodeId}). ${info.leftname}: ${aVal.kind} vs. ${info.rightname}: ${bVal.kind}`); + if(aVal.type !== bVal.type) { + info.report.addComment(`${info.position}Different types for ${key} (${aVal.nodeId}). ${info.leftname}: ${aVal.type} vs. ${info.rightname}: ${bVal.type}`); } } } diff --git a/src/dataflow/environments/environment.ts b/src/dataflow/environments/environment.ts index 2c4869de86..1113837342 100644 --- a/src/dataflow/environments/environment.ts +++ b/src/dataflow/environments/environment.ts @@ -5,6 +5,7 @@ * @module */ import type { Identifier, IdentifierDefinition, IdentifierReference } from './identifier'; +import { ReferenceType } from './identifier'; import { BuiltInMemory, EmptyBuiltInMemory } from './built-in'; import type { DataflowGraph } from '../graph/graph'; import { resolveByName } from './resolve-by-name'; @@ -15,9 +16,9 @@ import { jsonReplacer } from '../../util/json'; export function makeReferenceMaybe(ref: IdentifierReference, graph: DataflowGraph, environments: REnvironmentInformation, includeDefs: boolean, defaultCd: ControlDependency | undefined = undefined): IdentifierReference { const node = graph.get(ref.nodeId, true); if(includeDefs) { - const definitions = ref.name ? resolveByName(ref.name, environments) : undefined; + const definitions = ref.name ? resolveByName(ref.name, environments, ref.type) : undefined; for(const definition of definitions ?? []) { - if(definition.kind !== 'built-in-function' && definition.kind !== 'built-in-value') { + if(definition.type !== ReferenceType.BuiltInFunction && definition.type !== ReferenceType.BuiltInConstant) { if(definition.controlDependencies && defaultCd && !definition.controlDependencies.find(c => c.id === defaultCd.id)) { definition.controlDependencies.push(defaultCd); } else { @@ -60,13 +61,12 @@ export interface IEnvironment { let environmentIdCounter = 0; export class Environment implements IEnvironment { - readonly id; + readonly id = environmentIdCounter++; parent: IEnvironment; memory: Map; - constructor(parent: IEnvironment, id?: number) { + constructor(parent: IEnvironment) { this.parent = parent; - this.id = id ?? environmentIdCounter++; this.memory = new Map(); } } diff --git a/src/dataflow/environments/identifier.ts b/src/dataflow/environments/identifier.ts index 3dc1e39994..21c8b803f8 100644 --- a/src/dataflow/environments/identifier.ts +++ b/src/dataflow/environments/identifier.ts @@ -4,16 +4,26 @@ import type { ControlDependency } from '../info'; export type Identifier = string & { __brand?: 'identifier' } -interface InGraphIdentifierDefinition extends IdentifierReference { - kind: 'function' | 'variable' | 'parameter' | 'argument' - /** The assignment (or whatever, like `assign` function call) node which ultimately defined this identifier */ - definedAt: NodeId +export const enum ReferenceType { + /** The identifier type is unknown */ + Unknown, + /** The identifier is defined by a function (includes built-in function) */ + Function, + /** The identifier is defined by a variable (includes parameter and argument) */ + Variable, + /** The identifier is defined by a constant (includes built-in constant) */ + Constant, + /** The identifier is defined by a parameter (which we know nothing about at the moment) */ + Parameter, + /** The identifier is defined by an argument (which we know nothing about at the moment) */ + Argument, + /** The identifier is defined by a built-in value/constant */ + BuiltInConstant, + /** The identifier is defined by a built-in function */ + BuiltInFunction } -/** - * Stores the definition of an identifier within an {@link IEnvironment} - */ -export type IdentifierDefinition = InGraphIdentifierDefinition | BuiltInIdentifierDefinition | BuiltInIdentifierConstant +export type InGraphReferenceType = Exclude /** * Something like `a` in `b <- a`. @@ -25,9 +35,23 @@ export interface IdentifierReference { readonly nodeId: NodeId /** Name the reference is identified by (e.g., the name of the variable), undefined if the reference is "artificial" (e.g., anonymous) */ readonly name: Identifier | undefined + /** Type of the reference to be resolved */ + readonly type: ReferenceType; /** - * If the reference is only effective if, e.g. an if-then-else condition is true, this references the root of the `if`. - * As a hackey intermediate solution (until we have pointer-analysis), an empty array may indicate a `maybe` which is due to pointer access (e.g., in `a[x] <- 3`). + * If the reference is only effective, if, for example, an if-then-else condition is true, this references the root of the `if`. + * As a hacky intermediate solution (until we have pointer-analysis), an empty array may indicate a `maybe` which is due to pointer access (e.g., in `a[x] <- 3`). */ controlDependencies: ControlDependency[] | undefined } + + +interface InGraphIdentifierDefinition extends IdentifierReference { + readonly type: InGraphReferenceType + /** The assignment (or whatever, like `assign` function call) node which ultimately defined this identifier */ + readonly definedAt: NodeId +} + +/** + * Stores the definition of an identifier within an {@link IEnvironment} + */ +export type IdentifierDefinition = InGraphIdentifierDefinition | BuiltInIdentifierDefinition | BuiltInIdentifierConstant diff --git a/src/dataflow/environments/resolve-by-name.ts b/src/dataflow/environments/resolve-by-name.ts index ba72e07865..79286b5aa3 100644 --- a/src/dataflow/environments/resolve-by-name.ts +++ b/src/dataflow/environments/resolve-by-name.ts @@ -1,29 +1,47 @@ import type { IEnvironment, REnvironmentInformation } from './environment'; -import { BuiltInEnvironment } from './environment'; +import { BuiltInEnvironment } from './environment'; import { Ternary } from '../../util/logic'; import type { Identifier, IdentifierDefinition } from './identifier'; +import { ReferenceType } from './identifier'; import { happensInEveryBranch } from '../info'; +const TargetTypePredicate = { + [ReferenceType.Unknown]: () => true, + [ReferenceType.Argument]: () => true, + [ReferenceType.Parameter]: () => true, + [ReferenceType.Variable]: t => t.type === ReferenceType.Variable || t.type === ReferenceType.Parameter || t.type === ReferenceType.Argument || t.type === ReferenceType.Unknown, + [ReferenceType.Function]: t => t.type === ReferenceType.Function || t.type === ReferenceType.BuiltInFunction || t.type === ReferenceType.Unknown, + [ReferenceType.Constant]: t => t.type === ReferenceType.Constant || t.type === ReferenceType.BuiltInConstant || t.type === ReferenceType.Unknown, + [ReferenceType.BuiltInConstant]: t => t.type === ReferenceType.BuiltInConstant || t.type === ReferenceType.Unknown, + [ReferenceType.BuiltInFunction]: t => t.type === ReferenceType.BuiltInFunction || t.type === ReferenceType.Unknown +} as const satisfies Record boolean>; + /** * Resolves a given identifier name to a list of its possible definition location using R scoping and resolving rules. * * @param name - The name of the identifier to resolve * @param environment - The current environment used for name resolution + * @param target - The target (meta) type of the identifier to resolve * * @returns A list of possible definitions of the identifier (one if the definition location is exactly and always known), or `undefined` if the identifier is undefined in the current scope/with the current environment information. */ -export function resolveByName(name: Identifier, environment: REnvironmentInformation): IdentifierDefinition[] | undefined { +export function resolveByName(name: Identifier, environment: REnvironmentInformation, target: ReferenceType = ReferenceType.Unknown): IdentifierDefinition[] | undefined { let current: IEnvironment = environment.current; let definitions: IdentifierDefinition[] | undefined = undefined; + const wantedType = TargetTypePredicate[target]; + if(name === 'c') { + console.trace('resolve', name, target); + } do{ const definition = current.memory.get(name); if(definition !== undefined) { - if(definition.every(d => happensInEveryBranch(d.controlDependencies))) { + const filtered = definition.filter(wantedType); + if(filtered.length === definition.length && definition.every(d => happensInEveryBranch(d.controlDependencies))) { return definition; - } else { + } else if(filtered.length > 0) { definitions ??= []; - definitions.push(...definition); + definitions.push(...filtered); } } current = current.parent; @@ -41,7 +59,7 @@ export function resolvesToBuiltInConstant(name: Identifier | undefined, environm if(name === undefined) { return Ternary.Never; } - const definition = resolveByName(name, environment); + const definition = resolveByName(name, environment, ReferenceType.Constant); if(definition === undefined) { return Ternary.Never; @@ -50,7 +68,7 @@ export function resolvesToBuiltInConstant(name: Identifier | undefined, environm let all = true; let some = false; for(const def of definition) { - if(def.kind === 'built-in-value' && def.value === wantedValue) { + if(def.type === ReferenceType.BuiltInConstant && def.value === wantedValue) { some = true; } else { all = false; diff --git a/src/dataflow/graph/graph.ts b/src/dataflow/graph/graph.ts index e6bfa33d1f..9ace7c7b18 100644 --- a/src/dataflow/graph/graph.ts +++ b/src/dataflow/graph/graph.ts @@ -18,7 +18,7 @@ import type { IdentifierDefinition, IdentifierReference } from '../environments/ import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { normalizeIdToNumberIfPossible } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import type { REnvironmentInformation } from '../environments/environment'; -import { initializeCleanEnvironments } from '../environments/environment'; +import { initializeCleanEnvironments } from '../environments/environment'; import type { AstIdMap } from '../../r-bridge/lang-4.x/ast/model/processing/decorate'; import { cloneEnvironmentInformation } from '../environments/clone'; import { jsonReplacer } from '../../util/json'; @@ -412,7 +412,6 @@ export class DataflowGraph< function mergeNodeInfos(current: Vertex, next: Vertex): Vertex { guard(current.tag === next.tag, () => `nodes to be joined for the same id must have the same tag, but ${JSON.stringify(current, jsonReplacer)} vs ${JSON.stringify(next, jsonReplacer)}`); - guard(current.environment?.current.id === next.environment?.current.id, () => `nodes to be joined for the same id must have the same environment, but not for: ${JSON.stringify(current, jsonReplacer)} vs ${JSON.stringify(next, jsonReplacer)}`); if(current.tag === 'variable-definition') { guard(current.scope === next.scope, 'nodes to be joined for the same id must have the same scope'); diff --git a/src/dataflow/internal/linker.ts b/src/dataflow/internal/linker.ts index 3be20963ea..e7169d3e8a 100644 --- a/src/dataflow/internal/linker.ts +++ b/src/dataflow/internal/linker.ts @@ -4,6 +4,7 @@ import { expensiveTrace, log } from '../../util/log'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { recoverName } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import type { IdentifierReference } from '../environments/identifier'; +import { ReferenceType } from '../environments/identifier'; import type { DataflowGraph, FunctionArgument } from '../graph/graph'; import { isNamedArgument } from '../graph/graph'; import type { RParameter } from '../../r-bridge/lang-4.x/ast/model/nodes/r-parameter'; @@ -41,13 +42,15 @@ export function findNonLocalReads(graph: DataflowGraph): IdentifierReference[] { for(const [target, { types }] of outgoing) { if(edgeIncludesType(types, EdgeType.Reads) && !ids.has(target)) { const name = recoverName(id, graph.idMap); + const origin = graph.getVertex(id, true); if(!name) { dataflowLogger.warn('found non-local read without name for id ' + id); } nonLocalReads.push({ name: recoverName(id, graph.idMap), nodeId: id, - controlDependencies: undefined + controlDependencies: undefined, + type: origin?.tag === VertexType.FunctionCall ? ReferenceType.Function : ReferenceType.Variable }); break; } @@ -140,7 +143,7 @@ export function linkFunctionCallWithSingleTarget( if(info.environment !== undefined) { // for each open ingoing reference, try to resolve it here, and if so, add a read edge from the call to signal that it reads it for(const ingoing of def.subflow.in) { - const defs = ingoing.name ? resolveByName(ingoing.name, info.environment) : undefined; + const defs = ingoing.name ? resolveByName(ingoing.name, info.environment, ingoing.type) : undefined; if(defs === undefined) { continue; } @@ -273,7 +276,7 @@ export function getAllLinkedFunctionDefinitions( */ export function linkInputs(referencesToLinkAgainstEnvironment: readonly IdentifierReference[], environmentInformation: REnvironmentInformation, givenInputs: IdentifierReference[], graph: DataflowGraph, maybeForRemaining: boolean): IdentifierReference[] { for(const bodyInput of referencesToLinkAgainstEnvironment) { - const probableTarget = bodyInput.name ? resolveByName(bodyInput.name, environmentInformation) : undefined; + const probableTarget = bodyInput.name ? resolveByName(bodyInput.name, environmentInformation, bodyInput.type) : undefined; if(probableTarget === undefined) { log.trace(`found no target for ${bodyInput.name}`); if(maybeForRemaining) { diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-access.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-access.ts index b50dd037a6..7424c5a63a 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-access.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-access.ts @@ -15,6 +15,7 @@ import { makeAllMaybe, makeReferenceMaybe } from '../../../../../environments/en import type { ForceArguments } from '../common'; import { BuiltIn } from '../../../../../environments/built-in'; import { markAsAssignment } from './built-in-assignment'; +import { ReferenceType } from '../../../../../environments/identifier'; interface TableAssignmentProcessorMarker { definitionRootNodes: NodeId[] @@ -52,7 +53,7 @@ export function processAccess( const existing = data.environment.current.memory.get(':='); const outInfo = { definitionRootNodes: [] }; data.environment.current.memory.set(':=', [{ - kind: 'built-in-function', + type: ReferenceType.BuiltInFunction, definedAt: BuiltIn, controlDependencies: undefined, processor: (name, args, rootId, data) => tableAssignmentProcessor(name, args, rootId, data, outInfo), @@ -66,7 +67,7 @@ export function processAccess( } if(head.value && outInfo.definitionRootNodes.length > 0) { markAsAssignment(fnCall.information, - { kind: 'variable', name: head.value.lexeme ?? '', nodeId: head.value.info.id, definedAt: rootId, controlDependencies: [] }, + { type: ReferenceType.Variable, name: head.value.lexeme ?? '', nodeId: head.value.info.id, definedAt: rootId, controlDependencies: [] }, outInfo.definitionRootNodes, rootId ); diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts index c82775112e..0c33c8f73b 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-apply.ts @@ -12,6 +12,7 @@ import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type'; import { VertexType } from '../../../../../graph/vertex'; import type { FunctionArgument } from '../../../../../graph/graph'; import { EdgeType } from '../../../../../graph/edge'; +import { ReferenceType } from '../../../../../environments/identifier'; export interface BuiltInApplyConfiguration extends MergeableRecord { /** the 0-based index of the argument which is the actual function passed, defaults to 1 */ @@ -65,6 +66,7 @@ export function processApply( return { name: counterpart.name.content, controlDependencies: data.controlDependencies, + type: ReferenceType.Argument, nodeId: arg.entryPoint }; } else { diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts index 62fad60904..4f2680de1e 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-assignment.ts @@ -6,14 +6,22 @@ import { log, LogLevel } from '../../../../../../util/log'; import { unpackArgument } from '../argument/unpack-argument'; import { processAsNamedCall } from '../../../process-named-call'; import { toUnnamedArgument } from '../argument/make-argument'; -import type { ParentInformation, RNodeWithParent } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/decorate'; +import type { + ParentInformation, + RNodeWithParent +} from '../../../../../../r-bridge/lang-4.x/ast/model/processing/decorate'; import type { Base, Location, RNode } from '../../../../../../r-bridge/lang-4.x/ast/model/model'; import type { RSymbol } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol'; import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type'; import type { RFunctionArgument } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import type { NodeId } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { dataflowLogger } from '../../../../../logger'; -import type { IdentifierDefinition, IdentifierReference } from '../../../../../environments/identifier'; +import type { + IdentifierDefinition, + IdentifierReference, + InGraphReferenceType } from '../../../../../environments/identifier'; +import { ReferenceType +} from '../../../../../environments/identifier'; import { overwriteEnvironment } from '../../../../../environments/overwrite'; import type { RString } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-string'; import { removeRQuotes } from '../../../../../../r-bridge/retriever'; @@ -148,10 +156,10 @@ function extractSourceAndTarget(args: readonly RFunctionArgument(rootId: NodeId, target: DataflowInformation, isFunctionDef: boolean, data: DataflowProcessorInformation, makeMaybe: boolean): IdentifierDefinition[] { +function produceWrittenNodes(rootId: NodeId, target: DataflowInformation, referenceType: InGraphReferenceType, data: DataflowProcessorInformation, makeMaybe: boolean): IdentifierDefinition[] { return [...target.in, ...target.unknownReferences].map(ref => ({ ...ref, - kind: isFunctionDef ? 'function' : 'variable', + type: referenceType, definedAt: rootId, controlDependencies: data.controlDependencies ?? (makeMaybe ? [] : undefined) })); @@ -201,8 +209,17 @@ function processAssignmentToString( }); } -function checkFunctionDef(source: RNode, sourceInfo: DataflowInformation) { - return sourceInfo.graph.getVertex(source.info.id)?.tag === VertexType.FunctionDefinition; +function checkTargetReferenceType(source: RNode, sourceInfo: DataflowInformation): InGraphReferenceType { + const vert = sourceInfo.graph.getVertex(source.info.id, true); + switch(vert?.tag) { + case VertexType.FunctionDefinition: + return ReferenceType.Function; + case VertexType.Use: + case VertexType.FunctionCall: + return ReferenceType.Unknown; + default: + return ReferenceType.Variable; + } } export interface AssignmentToSymbolParameters extends AssignmentConfiguration { @@ -268,9 +285,9 @@ function processAssignmentToSymbol({ makeMaybe, quoteSource }: AssignmentToSymbolParameters): DataflowInformation { - const isFunctionDef = checkFunctionDef(source, sourceArg); + const referenceType = checkTargetReferenceType(source, sourceArg); - const writeNodes = produceWrittenNodes(rootId, targetArg, isFunctionDef, data, makeMaybe ?? false); + const writeNodes = produceWrittenNodes(rootId, targetArg, referenceType, data, makeMaybe ?? false); if(writeNodes.length !== 1 && log.settings.minLevel <= LogLevel.Warn) { log.warn(`Unexpected write number in assignment: ${JSON.stringify(writeNodes)}`); @@ -278,7 +295,10 @@ function processAssignmentToSymbol({ // we drop the first arg which we use to pass along arguments :D const readFromSourceWritten = sourceArg.out.slice(1); - const readTargets: readonly IdentifierReference[] = [{ nodeId: rootId, name: nameOfAssignmentFunction, controlDependencies: data.controlDependencies }, ...sourceArg.unknownReferences, ...sourceArg.in, ...targetArg.in.filter(i => i.nodeId !== target.info.id), ...readFromSourceWritten]; + const readTargets: readonly IdentifierReference[] = [ + { nodeId: rootId, name: nameOfAssignmentFunction, controlDependencies: data.controlDependencies, type: ReferenceType.Function }, + ...sourceArg.unknownReferences, ...sourceArg.in, ...targetArg.in.filter(i => i.nodeId !== target.info.id), ...readFromSourceWritten + ]; information.environment = overwriteEnvironment(targetArg.environment, sourceArg.environment); diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts index f9c9d78e68..ccbff03782 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts @@ -3,7 +3,7 @@ * @module */ import type { DataflowInformation, ExitPoint } from '../../../../../info'; -import { happensInEveryBranch , addNonDefaultExitPoints, alwaysExits, ExitPointType } from '../../../../../info'; +import { addNonDefaultExitPoints, alwaysExits, ExitPointType, happensInEveryBranch } from '../../../../../info'; import type { DataflowProcessorInformation } from '../../../../../processor'; import { processDataflowFor } from '../../../../../processor'; import { linkFunctionCalls } from '../../../../linker'; @@ -15,6 +15,7 @@ import { makeAllMaybe } from '../../../../../environments/environment'; import type { NodeId } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { DataflowGraph } from '../../../../../graph/graph'; import type { IdentifierReference } from '../../../../../environments/identifier'; +import { ReferenceType } from '../../../../../environments/identifier'; import { resolveByName } from '../../../../../environments/resolve-by-name'; import { EdgeType } from '../../../../../graph/edge'; import type { DataflowGraphVertexInfo } from '../../../../../graph/vertex'; @@ -31,7 +32,7 @@ const dotDotDotAccess = /\.\.\d+/; function linkReadNameToWriteIfPossible(read: IdentifierReference, environments: REnvironmentInformation, listEnvironments: Set, remainingRead: Map, nextGraph: DataflowGraph) { const readName = read.name && dotDotDotAccess.test(read.name) ? '...' : read.name; - const probableTarget = readName ? resolveByName(readName, environments) : undefined; + const probableTarget = readName ? resolveByName(readName, environments, read.type) : undefined; // record if at least one has not been defined if(probableTarget === undefined || probableTarget.some(t => !listEnvironments.has(t.nodeId) || !happensInEveryBranch(t.controlDependencies))) { @@ -189,7 +190,7 @@ export function processExpressionList( const withGroup = rootNode?.grouping; if(withGroup) { - ingoing.push({ nodeId: rootId, name: name.content, controlDependencies: data.controlDependencies }); + ingoing.push({ nodeId: rootId, name: name.content, controlDependencies: data.controlDependencies, type: ReferenceType.Function }); patchFunctionCall({ nextGraph, rootId, diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts index 3ee1c9241b..4b0c2698bd 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-for-loop.ts @@ -1,12 +1,8 @@ import type { DataflowProcessorInformation } from '../../../../../processor'; import { processDataflowFor } from '../../../../../processor'; import type { DataflowInformation } from '../../../../../info'; -import { filterOutLoopExitPoints, alwaysExits } from '../../../../../info'; -import { - findNonLocalReads, - linkCircularRedefinitionsWithinALoop, - produceNameSharedIdMap -} from '../../../../linker'; +import { alwaysExits, filterOutLoopExitPoints } from '../../../../../info'; +import { findNonLocalReads, linkCircularRedefinitionsWithinALoop, produceNameSharedIdMap } from '../../../../linker'; import { processKnownFunctionCall } from '../known-call-handling'; import { guard } from '../../../../../../util/assert'; import { patchFunctionCall } from '../common'; @@ -21,6 +17,7 @@ import { appendEnvironment } from '../../../../../environments/append'; import { makeAllMaybe } from '../../../../../environments/environment'; import { EdgeType } from '../../../../../graph/edge'; import type { RSymbol } from '../../../../../../r-bridge/lang-4.x/ast/model/nodes/r-symbol'; +import { ReferenceType } from '../../../../../environments/identifier'; export function processForLoop( name: RSymbol, @@ -53,7 +50,7 @@ export function processForLoop( const writtenVariable = [...variable.unknownReferences, ...variable.in]; for(const write of writtenVariable) { - headEnvironments = define({ ...write, definedAt: name.info.id, kind: 'variable' }, false, headEnvironments); + headEnvironments = define({ ...write, definedAt: name.info.id, type: ReferenceType.Variable }, false, headEnvironments); } data = { ...data, environment: headEnvironments }; @@ -91,7 +88,7 @@ export function processForLoop( return { unknownReferences: [], // we only want those not bound by a local variable - in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency }, ...vector.unknownReferences, ...[...nameIdShares.values()].flat()], + in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency, type: ReferenceType.Function }, ...vector.unknownReferences, ...[...nameIdShares.values()].flat()], out: outgoing, graph: nextGraph, entryPoint: name.info.id, diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts index 65fca3f7d5..407b02bd2d 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-function-definition.ts @@ -147,7 +147,7 @@ function updateNestedFunctionClosures( const ingoingRefs = subflow.in; const remainingIn: IdentifierReference[] = []; for(const ingoing of ingoingRefs) { - const resolved = ingoing.name ? resolveByName(ingoing.name, outEnvironment) : undefined; + const resolved = ingoing.name ? resolveByName(ingoing.name, outEnvironment, ingoing.type) : undefined; if(resolved === undefined) { remainingIn.push(ingoing); continue; @@ -183,7 +183,7 @@ function findPromiseLinkagesForParameters(parameters: DataflowGraph, readInParam // first, we try to bind again within parameters - if we have it, fine const remainingRead: IdentifierReference[] = []; for(const read of readInParameters) { - const resolved = read.name ? resolveByName(read.name, parameterEnvs) : undefined; + const resolved = read.name ? resolveByName(read.name, parameterEnvs, read.type) : undefined; if(resolved !== undefined) { for(const ref of resolved) { parameters.addEdge(read, ref, { type: EdgeType.Reads }); diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.ts index 26baca41d1..a94bdd67d8 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-if-then-else.ts @@ -14,6 +14,7 @@ import { resolvesToBuiltInConstant } from '../../../../../environments/resolve-b import { EdgeType } from '../../../../../graph/edge'; import { appendEnvironment } from '../../../../../environments/append'; import type { IdentifierReference } from '../../../../../environments/identifier'; +import { ReferenceType } from '../../../../../environments/identifier'; import { makeAllMaybe } from '../../../../../environments/environment'; import { Ternary } from '../../../../../../util/logic'; @@ -118,7 +119,7 @@ export function processIfThenElse( return { unknownReferences: [], - in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency }, ...ingoing], + in: [{ nodeId: rootId, name: name.content, controlDependencies: originalDependency, type: ReferenceType.Function }, ...ingoing], out: outgoing, exitPoints, entryPoint: rootId, diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-pipe.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-pipe.ts index a03ece6eb3..1bcf619c1e 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-pipe.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-pipe.ts @@ -11,6 +11,7 @@ import { dataflowLogger } from '../../../../../logger'; import { RType } from '../../../../../../r-bridge/lang-4.x/ast/model/type'; import { VertexType } from '../../../../../graph/vertex'; import { EdgeType } from '../../../../../graph/edge'; +import { ReferenceType } from '../../../../../environments/identifier'; export function processPipe( @@ -43,7 +44,8 @@ export function processPipe( functionCallNode.args.unshift({ name: undefined, nodeId: argId, - controlDependencies: data.controlDependencies + controlDependencies: data.controlDependencies, + type: ReferenceType.Function }); information.graph.addEdge(functionCallNode.id, argId, { type: EdgeType.Argument }); } diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-while-loop.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-while-loop.ts index 1b2266f6a6..cb2e50896a 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-while-loop.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-while-loop.ts @@ -1,9 +1,10 @@ import type { DataflowProcessorInformation } from '../../../../../processor'; import type { DataflowInformation } from '../../../../../info'; -import { alwaysExits , filterOutLoopExitPoints } from '../../../../../info'; +import { alwaysExits, filterOutLoopExitPoints } from '../../../../../info'; import { findNonLocalReads, - linkCircularRedefinitionsWithinALoop, linkInputs, + linkCircularRedefinitionsWithinALoop, + linkInputs, produceNameSharedIdMap } from '../../../../linker'; import { processKnownFunctionCall } from '../known-call-handling'; @@ -18,6 +19,7 @@ import { dataflowLogger } from '../../../../../logger'; import type { RNode } from '../../../../../../r-bridge/lang-4.x/ast/model/model'; import { makeAllMaybe } from '../../../../../environments/environment'; import { EdgeType } from '../../../../../graph/edge'; +import { ReferenceType } from '../../../../../environments/identifier'; export function processWhileLoop( name: RSymbol, @@ -71,7 +73,7 @@ export function processWhileLoop( return { unknownReferences: [], - in: [{ nodeId: name.info.id, name: name.lexeme, controlDependencies: originalDependency }, ...remainingInputs], + in: [{ nodeId: name.info.id, name: name.lexeme, controlDependencies: originalDependency, type: ReferenceType.Function }, ...remainingInputs], out: [...makeAllMaybe(body.out, information.graph, information.environment, true), ...condition.out], entryPoint: name.info.id, exitPoints: filterOutLoopExitPoints(body.exitPoints), diff --git a/src/dataflow/internal/process/functions/call/common.ts b/src/dataflow/internal/process/functions/call/common.ts index 54f0107e0a..29200bbe2f 100644 --- a/src/dataflow/internal/process/functions/call/common.ts +++ b/src/dataflow/internal/process/functions/call/common.ts @@ -10,6 +10,7 @@ import type { DataflowGraph, FunctionArgument } from '../../../../graph/graph'; import type { NodeId } from '../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import type { REnvironmentInformation } from '../../../../environments/environment'; import type { IdentifierReference } from '../../../../environments/identifier'; +import { ReferenceType } from '../../../../environments/identifier'; import { overwriteEnvironment } from '../../../../environments/overwrite'; import { resolveByName } from '../../../../environments/resolve-by-name'; import { RType } from '../../../../../r-bridge/lang-4.x/ast/model/type'; @@ -66,7 +67,7 @@ function forceVertexArgumentValueReferences(rootId: NodeId, value: DataflowInfor // try to resolve them against the current environment for(const ref of [...value.in, ...containedSubflowIn.flatMap(n => n.subflow.in)]) { if(ref.name) { - const resolved = resolveByName(ref.name, env) ?? []; + const resolved = resolveByName(ref.name, env, ref.type) ?? []; for(const resolve of resolved) { graph.addEdge(ref.nodeId, resolve.nodeId, { type: EdgeType.Reads }); } @@ -104,7 +105,7 @@ export function processAllArguments( // resolve reads within argument, we resolve before adding the `processed.environment` to avoid cyclic dependencies for(const ingoing of [...processed.in, ...processed.unknownReferences]) { - const tryToResolve = ingoing.name ? resolveByName(ingoing.name, argEnv) : undefined; + const tryToResolve = ingoing.name ? resolveByName(ingoing.name, argEnv, ReferenceType.Unknown) : undefined; if(tryToResolve === undefined) { remainingReadInArgs.push(ingoing); } else { @@ -126,9 +127,9 @@ export function processAllArguments( finalGraph.mergeWith(processed.graph); if(arg.type !== RType.Argument || !arg.name) { - callArgs.push({ nodeId: processed.entryPoint, controlDependencies: undefined }); + callArgs.push({ nodeId: processed.entryPoint, controlDependencies: undefined, type: ReferenceType.Argument }); } else { - callArgs.push({ nodeId: processed.entryPoint, name: arg.name.content, controlDependencies: undefined }); + callArgs.push({ nodeId: processed.entryPoint, name: arg.name.content, controlDependencies: undefined, type: ReferenceType.Argument }); } finalGraph.addEdge(functionRootId, processed.entryPoint, { type: EdgeType.Argument }); @@ -155,7 +156,7 @@ export function patchFunctionCall( /* will be overwritten accordingly */ onlyBuiltin: false, controlDependencies: data.controlDependencies, - args: argumentProcessResult.map(arg => arg === undefined ? EmptyArgument : { nodeId: arg.entryPoint, controlDependencies: undefined, call: undefined }) + args: argumentProcessResult.map(arg => arg === undefined ? EmptyArgument : { nodeId: arg.entryPoint, controlDependencies: undefined, call: undefined, type: ReferenceType.Argument }), }); for(const arg of argumentProcessResult) { if(arg) { diff --git a/src/dataflow/internal/process/functions/call/known-call-handling.ts b/src/dataflow/internal/process/functions/call/known-call-handling.ts index a499fbe482..a6b2b9ed07 100644 --- a/src/dataflow/internal/process/functions/call/known-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/known-call-handling.ts @@ -10,6 +10,7 @@ import type { RFunctionArgument } from '../../../../../r-bridge/lang-4.x/ast/mod import type { NodeId } from '../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import type { RNode } from '../../../../../r-bridge/lang-4.x/ast/model/model'; import type { IdentifierReference } from '../../../../environments/identifier'; +import { ReferenceType } from '../../../../environments/identifier'; import { DataflowGraph } from '../../../../graph/graph'; import { EdgeType } from '../../../../graph/edge'; import { dataflowLogger } from '../../../../logger'; @@ -92,7 +93,7 @@ export function processKnownFunctionCall( } const inIds = remainingReadInArgs; - const fnRef = { nodeId: rootId, name: functionCallName, controlDependencies: data.controlDependencies, call: true as const }; + const fnRef: IdentifierReference = { nodeId: rootId, name: functionCallName, controlDependencies: data.controlDependencies, type: ReferenceType.Function }; inIds.push(fnRef); return { diff --git a/src/dataflow/internal/process/functions/call/named-call-handling.ts b/src/dataflow/internal/process/functions/call/named-call-handling.ts index 0dd8ed014f..0378c08eab 100644 --- a/src/dataflow/internal/process/functions/call/named-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/named-call-handling.ts @@ -9,6 +9,7 @@ import type { RSymbol } from '../../../../../r-bridge/lang-4.x/ast/model/nodes/r import type { NodeId } from '../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { resolveByName } from '../../../../environments/resolve-by-name'; import { VertexType } from '../../../../graph/vertex'; +import { ReferenceType } from '../../../../environments/identifier'; function mergeInformation(info: DataflowInformation | undefined, newInfo: DataflowInformation): DataflowInformation { @@ -34,7 +35,7 @@ function processDefaultFunctionProcessor( rootId: NodeId, data: DataflowProcessorInformation ) { - const resolve = resolveByName(name.content, data.environment); + const resolve = resolveByName(name.content, data.environment, ReferenceType.Function); /* if we do not know where we land, we force! */ const call = processKnownFunctionCall({ name, args, rootId, data, forceArgs: (resolve?.length ?? 0) > 0 ? undefined : 'all' }); return mergeInformation(information, call.information); @@ -46,14 +47,14 @@ export function processNamedCall( rootId: NodeId, data: DataflowProcessorInformation ): DataflowInformation { - const resolved = resolveByName(name.content, data.environment) ?? []; + const resolved = resolveByName(name.content, data.environment, ReferenceType.Function) ?? []; let defaultProcessor = resolved.length === 0; let information: DataflowInformation | undefined = undefined; let builtIn = false; for(const resolvedFunction of resolved) { - if(resolvedFunction.kind === 'built-in-function') { + if(resolvedFunction.type === ReferenceType.BuiltInFunction) { builtIn = true; information = mergeInformation(information, resolvedFunction.processor(name, args, rootId, data)); } else { diff --git a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts index 9d04c30a4d..91d33866c5 100644 --- a/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts +++ b/src/dataflow/internal/process/functions/call/unnamed-call-handling.ts @@ -10,6 +10,7 @@ import { DataflowGraph } from '../../../../graph/graph'; import { VertexType } from '../../../../graph/vertex'; import { RType } from '../../../../../r-bridge/lang-4.x/ast/model/type'; import { dataflowLogger } from '../../../../logger'; +import { ReferenceType } from '../../../../environments/identifier'; export const UnnamedFunctionCallPrefix = 'unnamed-function-call-'; @@ -52,7 +53,7 @@ export function processUnnamedFunctionCall(functionCall: RUnnamedFunc }); const inIds = remainingReadInArgs; - inIds.push({ nodeId: functionRootId, name: functionCallName, controlDependencies: data.controlDependencies }); + inIds.push({ nodeId: functionRootId, name: functionCallName, controlDependencies: data.controlDependencies, type: ReferenceType.Function }); if(functionCall.calledFunction.type === RType.FunctionDefinition) { linkArgumentsOnCall(callArgs, functionCall.calledFunction.parameters, finalGraph); diff --git a/src/dataflow/internal/process/functions/process-parameter.ts b/src/dataflow/internal/process/functions/process-parameter.ts index 2eb62f8d55..2daabc9704 100644 --- a/src/dataflow/internal/process/functions/process-parameter.ts +++ b/src/dataflow/internal/process/functions/process-parameter.ts @@ -6,6 +6,7 @@ import { log } from '../../../../util/log'; import type { RParameter } from '../../../../r-bridge/lang-4.x/ast/model/nodes/r-parameter'; import type { ParentInformation } from '../../../../r-bridge/lang-4.x/ast/model/processing/decorate'; import type { IdentifierDefinition } from '../../../environments/identifier'; +import { ReferenceType } from '../../../environments/identifier'; import { define } from '../../../environments/define'; import { RType } from '../../../../r-bridge/lang-4.x/ast/model/type'; import { EdgeType } from '../../../graph/edge'; @@ -17,7 +18,7 @@ export function processFunctionParameter(parameter: RParameter ({ ...n, - kind: 'parameter', + type: ReferenceType.Parameter, definedAt: parameter.info.id })); diff --git a/src/dataflow/internal/process/process-symbol.ts b/src/dataflow/internal/process/process-symbol.ts index 752e2abf10..eb5fadc985 100644 --- a/src/dataflow/internal/process/process-symbol.ts +++ b/src/dataflow/internal/process/process-symbol.ts @@ -6,6 +6,7 @@ import type { ParentInformation } from '../../../r-bridge/lang-4.x/ast/model/pro import { RNa, RNull } from '../../../r-bridge/lang-4.x/convert-values'; import { DataflowGraph } from '../../graph/graph'; import { VertexType } from '../../graph/vertex'; +import { ReferenceType } from '../../environments/identifier'; export function processSymbol(symbol: RSymbol, data: DataflowProcessorInformation): DataflowInformation { if(symbol.content === RNull || symbol.content === RNa) { @@ -13,7 +14,7 @@ export function processSymbol(symbol: RSymbol(value: RNodeWithParent, data: DataflowProcessorInformation): DataflowInformation { return { unknownReferences: [], - in: [{ nodeId: value.info.id, name: undefined, controlDependencies: data.controlDependencies }], + in: [{ nodeId: value.info.id, name: undefined, controlDependencies: data.controlDependencies, type: ReferenceType.Constant }], out: [], environment: data.environment, graph: new DataflowGraph(data.completeAst.idMap).addVertex({ diff --git a/src/queries/call-context-query/call-context-query-executor.ts b/src/queries/call-context-query/call-context-query-executor.ts index fa3f6aee58..71d9bb4b8f 100644 --- a/src/queries/call-context-query/call-context-query-executor.ts +++ b/src/queries/call-context-query/call-context-query-executor.ts @@ -2,11 +2,11 @@ import type { DataflowGraph } from '../../dataflow/graph/graph'; import type { CallContextQuery, CallContextQueryKindResult, - CallContextQueryResult, CallContextQuerySubKindResult, + CallContextQueryResult, + CallContextQuerySubKindResult, SubCallContextQueryFormat } from './call-context-query-format'; -import { CallTargets -} from './call-context-query-format'; +import { CallTargets } from './call-context-query-format'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { VertexType } from '../../dataflow/graph/vertex'; import { assertUnreachable } from '../../util/assert'; @@ -14,11 +14,12 @@ import { edgeIncludesType, EdgeType } from '../../dataflow/graph/edge'; import { resolveByName } from '../../dataflow/environments/resolve-by-name'; import { BuiltIn } from '../../dataflow/environments/built-in'; import type { ControlFlowGraph } from '../../util/cfg/cfg'; -import { extractCFG } from '../../util/cfg/cfg'; +import { extractCFG } from '../../util/cfg/cfg'; import { TwoLayerCollector } from '../two-layer-collector'; import type { BasicQueryData } from '../query'; import { compactRecord } from '../../util/objects'; import { visitInReverseOrder } from '../../util/cfg/visitor'; +import { ReferenceType } from '../../dataflow/environments/identifier'; function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: CallTargets): NodeId[] | 'no' { const callVertex = graph.get(id); @@ -43,7 +44,7 @@ function satisfiesCallTargets(id: NodeId, graph: DataflowGraph, callTarget: Call * for performance and scoping reasons, flowR will not identify the global linkage, * including any potential built-in mapping. */ - const reResolved = resolveByName(info.name, info.environment); + const reResolved = resolveByName(info.name, info.environment, ReferenceType.Unknown); if(reResolved && reResolved.some(t => t.definedAt === BuiltIn)) { builtIn = true; } diff --git a/src/r-bridge/data/data.ts b/src/r-bridge/data/data.ts index b30f3e7960..92e1e2bf91 100644 --- a/src/r-bridge/data/data.ts +++ b/src/r-bridge/data/data.ts @@ -67,6 +67,12 @@ export const flowrCapabilities = { supported: 'not', description: '_Handling side-effects by environments which are not copied when modified_' }, + { + name: 'Search Type', + id: 'search-type', + supported: 'fully', + description: '_Separating the resolution for functions and symbols._' + }, { name: 'Search Path', id: 'search-path', diff --git a/src/slicing/static/slice-call.ts b/src/slicing/static/slice-call.ts index c707c46156..b4493aeb4e 100644 --- a/src/slicing/static/slice-call.ts +++ b/src/slicing/static/slice-call.ts @@ -6,7 +6,8 @@ import { envFingerprint } from './fingerprint'; import { getAllLinkedFunctionDefinitions } from '../../dataflow/internal/linker'; import type { DataflowGraphVertexFunctionCall, - DataflowGraphVertexFunctionDefinition, DataflowGraphVertexInfo + DataflowGraphVertexFunctionDefinition, + DataflowGraphVertexInfo } from '../../dataflow/graph/vertex'; import type { REnvironmentInformation } from '../../dataflow/environments/environment'; import { initializeCleanEnvironments } from '../../dataflow/environments/environment'; @@ -18,6 +19,7 @@ import { BuiltIn } from '../../dataflow/environments/built-in'; import { resolveByName } from '../../dataflow/environments/resolve-by-name'; import { edgeIncludesType, EdgeType } from '../../dataflow/graph/edge'; import type { NodeId } from '../../r-bridge/lang-4.x/ast/model/processing/node-id'; +import { ReferenceType } from '../../dataflow/environments/identifier'; function retrieveActiveEnvironment(callerInfo: DataflowGraphVertexFunctionCall, baseEnvironment: REnvironmentInformation): REnvironmentInformation { let callerEnvironment = callerInfo.environment; @@ -65,7 +67,7 @@ function linkCallTargets( for(const functionCallTarget of functionCallTargets) { // all those linked within the scopes of other functions are already linked when exiting a function definition for(const openIn of (functionCallTarget as DataflowGraphVertexFunctionDefinition).subflow.in) { - const defs = openIn.name ? resolveByName(openIn.name, activeEnvironment) : undefined; + const defs = openIn.name ? resolveByName(openIn.name, activeEnvironment, openIn.type) : undefined; if(defs === undefined) { continue; } @@ -95,7 +97,7 @@ export function sliceForCall(current: NodeToSlice, callerInfo: DataflowGraphVert const name = callerInfo.name; guard(name !== undefined, () => `name of id: ${callerInfo.id} can not be found in id map`); - const functionCallDefs = resolveByName(name, activeEnvironment)?.filter(d => d.definedAt !== BuiltIn)?.map(d => d.nodeId) ?? []; + const functionCallDefs = resolveByName(name, activeEnvironment, ReferenceType.Unknown)?.filter(d => d.definedAt !== BuiltIn)?.map(d => d.nodeId) ?? []; for(const [target, outgoingEdge] of outgoingEdges[1].entries()) { if(edgeIncludesType(outgoingEdge.types, EdgeType.Calls)) { diff --git a/src/util/mermaid/dfg.ts b/src/util/mermaid/dfg.ts index eebd972802..454dcb85d1 100644 --- a/src/util/mermaid/dfg.ts +++ b/src/util/mermaid/dfg.ts @@ -132,7 +132,7 @@ function mermaidNodeBrackets(tag: DataflowGraphVertexInfo['tag']): { open: strin } export function printIdentifier(id: IdentifierDefinition): string { - return `${id.name} (${id.nodeId}, ${id.kind},${id.controlDependencies? ' {' + id.controlDependencies.map(c => c.id + (c.when ? '+' : '-')).join(',') + '},' : ''} def. @${id.definedAt})`; + return `${id.name} (${id.nodeId}, ${id.type},${id.controlDependencies? ' {' + id.controlDependencies.map(c => c.id + (c.when ? '+' : '-')).join(',') + '},' : ''} def. @${id.definedAt})`; } function printEnvironmentToLines(env: IEnvironment | undefined): string[] { diff --git a/test/functionality/_helper/dataflow/environment-builder-printer.ts b/test/functionality/_helper/dataflow/environment-builder-printer.ts index f6327fc35e..a0c248650e 100644 --- a/test/functionality/_helper/dataflow/environment-builder-printer.ts +++ b/test/functionality/_helper/dataflow/environment-builder-printer.ts @@ -3,6 +3,7 @@ import { wrap, wrapControlDependencies } from './printer'; import type { IEnvironment, REnvironmentInformation } from '../../../../src/dataflow/environments/environment'; import { BuiltInEnvironment } from '../../../../src/dataflow/environments/environment'; import type { IdentifierDefinition } from '../../../../src/dataflow/environments/identifier'; +import { ReferenceType } from '../../../../src/dataflow/environments/identifier'; export class EnvironmentBuilderPrinter { private env: REnvironmentInformation; @@ -33,9 +34,10 @@ export class EnvironmentBuilderPrinter { } private processDefinition(name: string, def: IdentifierDefinition) { - const kind = def.kind; - switch(kind) { - case 'variable': + const { type } = def; + switch(type) { + case ReferenceType.Unknown: + case ReferenceType.Variable: this.recordFnCall('defineVariable', [ wrap(name), wrap(def.nodeId), @@ -43,7 +45,7 @@ export class EnvironmentBuilderPrinter { this.getControlDependencyArgument(def) ]); break; - case 'function': + case ReferenceType.Function: this.recordFnCall('defineFunction', [ wrap(name), wrap(def.nodeId), @@ -51,11 +53,13 @@ export class EnvironmentBuilderPrinter { this.getControlDependencyArgument(def) ]); break; - case 'built-in-value': - case 'built-in-function': + /* shouldn't happen here :D */ + case ReferenceType.Constant: + case ReferenceType.BuiltInFunction: + case ReferenceType.BuiltInConstant: /* shouldn't happen, only we can define built-in stuff */ break; - case 'argument': + case ReferenceType.Argument: this.recordFnCall('defineArgument', [ wrap(name), wrap(def.nodeId), @@ -63,7 +67,7 @@ export class EnvironmentBuilderPrinter { this.getControlDependencyArgument(def) ]); break; - case 'parameter': + case ReferenceType.Parameter: this.recordFnCall('defineParameter', [ wrap(name), wrap(def.nodeId), @@ -72,7 +76,7 @@ export class EnvironmentBuilderPrinter { ]); break; default: - assertUnreachable(kind); + assertUnreachable(type); } } diff --git a/test/functionality/_helper/dataflow/environment-builder.ts b/test/functionality/_helper/dataflow/environment-builder.ts index 208e8dc521..91a6bf09a7 100644 --- a/test/functionality/_helper/dataflow/environment-builder.ts +++ b/test/functionality/_helper/dataflow/environment-builder.ts @@ -1,8 +1,9 @@ import type { NodeId } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/node-id'; import { normalizeIdToNumberIfPossible } from '../../../../src/r-bridge/lang-4.x/ast/model/processing/node-id'; import type { IdentifierDefinition } from '../../../../src/dataflow/environments/identifier'; +import { ReferenceType } from '../../../../src/dataflow/environments/identifier'; import type { FunctionArgument } from '../../../../src/dataflow/graph/graph'; -import type { REnvironmentInformation , Environment } from '../../../../src/dataflow/environments/environment'; +import type { Environment, REnvironmentInformation } from '../../../../src/dataflow/environments/environment'; import { initializeCleanEnvironments } from '../../../../src/dataflow/environments/environment'; import { define } from '../../../../src/dataflow/environments/define'; import { popLocalEnvironment, pushLocalEnvironment } from '../../../../src/dataflow/environments/scoping'; @@ -10,7 +11,11 @@ import { appendEnvironment } from '../../../../src/dataflow/environments/append' import type { ControlDependency } from '../../../../src/dataflow/info'; export function variable(name: string, definedAt: NodeId): IdentifierDefinition { - return { name, kind: 'variable', nodeId: '_0', definedAt, controlDependencies: undefined }; + return { name, type: ReferenceType.Variable, nodeId: '_0', definedAt, controlDependencies: undefined }; +} + +export function asFunction(name: string, definedAt: NodeId): IdentifierDefinition { + return { name, type: ReferenceType.Function, nodeId: '_0', definedAt, controlDependencies: undefined }; } /** @@ -19,7 +24,7 @@ export function variable(name: string, definedAt: NodeId): IdentifierDefinition * @param options - optional allows to give further options */ export function argumentInCall(nodeId: NodeId, options?: { name?: string, controlDependencies?: ControlDependency[] }): FunctionArgument { - return { nodeId: normalizeIdToNumberIfPossible(nodeId), name: options?.name, controlDependencies: options?.controlDependencies?.map(c => ({ ...c, id: normalizeIdToNumberIfPossible(c.id) })) }; + return { nodeId: normalizeIdToNumberIfPossible(nodeId), type: ReferenceType.Argument, name: options?.name, controlDependencies: options?.controlDependencies?.map(c => ({ ...c, id: normalizeIdToNumberIfPossible(c.id) })) }; } /** * The constant global environment with all pre-defined functions. @@ -56,7 +61,7 @@ export class EnvironmentBuilder implements REnvironmentInformation { */ defineArgument(name: string, nodeId: NodeId, definedAt: NodeId, controlDependencies: ControlDependency[] | undefined = undefined) { return this.defineInEnv({ - kind: 'argument', + type: ReferenceType.Argument, name, definedAt, nodeId, @@ -72,7 +77,7 @@ export class EnvironmentBuilder implements REnvironmentInformation { */ defineFunction(name: string, nodeId: NodeId, definedAt: NodeId, controlDependencies: ControlDependency[] | undefined = undefined) { return this.defineInEnv({ - kind: 'function', + type: ReferenceType.Function, name, definedAt, nodeId, @@ -89,7 +94,7 @@ export class EnvironmentBuilder implements REnvironmentInformation { * */ defineParameter(name: string, nodeId: NodeId, definedAt: NodeId, controlDependencies: ControlDependency[] | undefined = undefined) { return this.defineInEnv({ - kind: 'parameter', + type: ReferenceType.Parameter, name, definedAt, nodeId, @@ -106,7 +111,7 @@ export class EnvironmentBuilder implements REnvironmentInformation { */ defineVariable(name: string, nodeId: NodeId, definedAt: NodeId = nodeId, controlDependencies: ControlDependency[] | undefined = undefined) { return this.defineInEnv({ - kind: 'variable', + type: ReferenceType.Variable, name, definedAt, nodeId, diff --git a/test/functionality/benchmark/slicer.spec.ts b/test/functionality/benchmark/slicer.spec.ts index 3ce6c318ea..5dcebdad0b 100644 --- a/test/functionality/benchmark/slicer.spec.ts +++ b/test/functionality/benchmark/slicer.spec.ts @@ -51,7 +51,7 @@ describe('Benchmark Slicer', () => { numberOfEdges: 4, // the defined-by edge and the arguments numberOfCalls: 1, // `<-` numberOfFunctionDefinitions: 0, // no definitions - sizeOfObject: 380 + sizeOfObject: 398 }, statInfo); assert.strictEqual(stats.perSliceMeasurements.numberOfSlices, 1, `sliced only once ${statInfo}`); @@ -118,7 +118,7 @@ cat(d)` numberOfEdges: 29, numberOfCalls: 9, numberOfFunctionDefinitions: 0, - sizeOfObject: 3053 + sizeOfObject: 3197 }, statInfo); assert.strictEqual(stats.perSliceMeasurements.numberOfSlices, 3, `sliced three times ${statInfo}`); diff --git a/test/functionality/dataflow/environments/resolve-tests.ts b/test/functionality/dataflow/environments/resolve-tests.ts index 214116f2a8..5aa82df9b6 100644 --- a/test/functionality/dataflow/environments/resolve-tests.ts +++ b/test/functionality/dataflow/environments/resolve-tests.ts @@ -1,15 +1,16 @@ import { expect } from 'chai'; import { guard } from '../../../../src/util/assert'; -import { defaultEnv, variable } from '../../_helper/dataflow/environment-builder'; +import { asFunction, defaultEnv, variable } from '../../_helper/dataflow/environment-builder'; import { label } from '../../_helper/label'; import { resolveByName } from '../../../../src/dataflow/environments/resolve-by-name'; +import { ReferenceType } from '../../../../src/dataflow/environments/identifier'; describe('Resolve', () => { describe('ByName', () => { it(label('Locally without distracting elements', ['global-scope', 'lexicographic-scope'], ['other']), () => { const xVar = variable('x', '_1'); const env = defaultEnv().defineInEnv(xVar); - const result = resolveByName('x', env); + const result = resolveByName('x', env, ReferenceType.Unknown); guard(result !== undefined, 'there should be a result'); expect(result, 'there should be exactly one definition for x').to.have.length(1); expect(result[0], 'it should be x').to.deep.equal(xVar); @@ -19,10 +20,26 @@ describe('Resolve', () => { .defineVariable('x', '_2', '_1'); const xVar = variable('x', '_1'); env = env.defineInEnv(xVar); - const result = resolveByName('x', env); + const result = resolveByName('x', env, ReferenceType.Unknown); guard(result !== undefined, 'there should be a result'); expect(result, 'there should be exactly one definition for x').to.have.length(1); expect(result[0], 'it should be x').to.be.deep.equal(xVar); }); + describe('Resolve Function', () => { + it(label('Locally without distracting elements', ['global-scope', 'lexicographic-scope', 'search-type'], ['other']), () => { + const xVar = variable('foo', '_1'); + const env = defaultEnv().defineInEnv(xVar); + const result = resolveByName('foo', env, ReferenceType.Function); + expect(result, 'there should be no result').to.be.undefined; + }); + }); + describe('Resolve Variable', () => { + it(label('Locally without distracting elements', ['global-scope', 'lexicographic-scope', 'search-type'], ['other']), () => { + const xVar = asFunction('foo', '_1'); + const env = defaultEnv().defineInEnv(xVar); + const result = resolveByName('foo', env, ReferenceType.Variable); + expect(result, 'there should be no result').to.be.undefined; + }); + }); }); }); diff --git a/test/functionality/dataflow/processing-of-elements/atomic/dataflow-atomic-tests.ts b/test/functionality/dataflow/processing-of-elements/atomic/dataflow-atomic-tests.ts index ef3ca58b3c..780657f98d 100644 --- a/test/functionality/dataflow/processing-of-elements/atomic/dataflow-atomic-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/atomic/dataflow-atomic-tests.ts @@ -16,6 +16,7 @@ import { OperatorDatabase } from '../../../../../src/r-bridge/lang-4.x/ast/model import type { FunctionArgument } from '../../../../../src/dataflow/graph/graph'; import { EmptyArgument } from '../../../../../src/r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import { UnnamedFunctionCallPrefix } from '../../../../../src/dataflow/internal/process/functions/call/unnamed-call-handling'; +import { ReferenceType } from '../../../../../src/dataflow/environments/identifier'; describe('Atomic (dataflow information)', withShell(shell => { describe('Uninteresting Leafs', () => { @@ -704,7 +705,7 @@ describe('Atomic (dataflow information)', withShell(shell => { entryPoint: '5', environment: defaultEnv().pushEnv(), graph: new Set(['3']), - in: [{ nodeId: '3', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '3', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], out: [], unknownReferences: [] }) @@ -721,7 +722,7 @@ describe('Atomic (dataflow information)', withShell(shell => { entryPoint: '0', environment: defaultEnv().pushEnv(), graph: new Set(['1']), - in: [{ nodeId: '9', name: 'get("a")', controlDependencies: [] }], + in: [{ nodeId: '9', name: 'get("a")', controlDependencies: [], type: ReferenceType.Argument }], out: [], unknownReferences: [] }) diff --git a/test/functionality/dataflow/processing-of-elements/atomic/dataflow-redefine-tests.ts b/test/functionality/dataflow/processing-of-elements/atomic/dataflow-redefine-tests.ts index e953dd9252..208aa14cc2 100644 --- a/test/functionality/dataflow/processing-of-elements/atomic/dataflow-redefine-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/atomic/dataflow-redefine-tests.ts @@ -5,6 +5,7 @@ import { OperatorDatabase } from '../../../../../src/r-bridge/lang-4.x/ast/model import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environment-builder'; import { EmptyArgument } from '../../../../../src/r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import { BuiltIn } from '../../../../../src/dataflow/environments/built-in'; +import { ReferenceType } from '../../../../../src/dataflow/environments/identifier'; describe('Redefining builtins', withShell(shell => { assertDataflow(label('if (print)', ['name-escaped', 'formals-dot-dot-dot', 'implicit-return', 'numbers', 'unnamed-arguments', ...OperatorDatabase['<-'].capabilities, 'newlines']), @@ -23,7 +24,7 @@ if(1) .constant('3', undefined, false) .defineFunction('5', ['3'], { out: [], - in: [{ nodeId: '3', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '3', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '3', graph: new Set(['1', '3']), @@ -56,7 +57,7 @@ print(x)`, emptyGraph() .constant('3', undefined, false) .defineFunction('5', ['3'], { out: [], - in: [{ nodeId: '3', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '3', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '3', graph: new Set(['1', '3']), diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts index a818e87745..f27bdf9550 100644 --- a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-if-then-tests.ts @@ -6,6 +6,7 @@ import { BuiltIn } from '../../../../../src/dataflow/environments/built-in'; import { OperatorDatabase } from '../../../../../src/r-bridge/lang-4.x/ast/model/operators'; import { EmptyArgument } from '../../../../../src/r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import { MIN_VERSION_LAMBDA } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions'; +import { ReferenceType } from '../../../../../src/dataflow/environments/identifier'; describe('Lists with if-then constructs', withShell(shell => { for(const assign of ['<-', '<<-', '=']) { @@ -276,7 +277,7 @@ a()`, emptyGraph() .constant('1', undefined, false) .defineFunction('3', ['1'], { out: [], - in: [{ nodeId: '1', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '1', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '1', graph: new Set(['1']), @@ -286,7 +287,7 @@ a()`, emptyGraph() .constant('11', undefined, false) .defineFunction('13', ['11'], { out: [], - in: [{ nodeId: '11', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '11', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '11', graph: new Set(['11']), @@ -335,7 +336,7 @@ f()`, emptyGraph() .defineVariable('0', 'a', { definedBy: ['1', '2'] }) .defineFunction('11', ['10'], { out: [], - in: [{ nodeId: '7', name: 'a', controlDependencies: [] }], + in: [{ nodeId: '7', name: 'a', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '10', graph: new Set(['7', '9', '10']), diff --git a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-read-tests.ts b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-read-tests.ts index 212561f6b2..c39699e683 100644 --- a/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-read-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/expression-lists/dataflow-read-tests.ts @@ -4,6 +4,7 @@ import { argumentInCall, defaultEnv } from '../../../_helper/dataflow/environmen import { label } from '../../../_helper/label'; import { OperatorDatabase } from '../../../../../src/r-bridge/lang-4.x/ast/model/operators'; import { BuiltIn } from '../../../../../src/dataflow/environments/built-in'; +import { ReferenceType } from '../../../../../src/dataflow/environments/identifier'; describe('Lists with variable references', withShell(shell => { describe('read-read same variable', () => { @@ -127,7 +128,7 @@ print(x)`, emptyGraph() .constant('3', undefined, false) .defineFunction('5', ['3'], { out: [], - in: [{ nodeId: '3', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '3', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '3', graph: new Set(['1', '3']), diff --git a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-call-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-call-tests.ts index ece7c49c37..e712ab37cc 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-call-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-call-tests.ts @@ -10,6 +10,7 @@ import { OperatorDatabase } from '../../../../../src/r-bridge/lang-4.x/ast/model import { BuiltIn } from '../../../../../src/dataflow/environments/built-in'; import { EmptyArgument } from '../../../../../src/r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import type { SupportedFlowrCapabilityId } from '../../../../../src/r-bridge/data/get'; +import { ReferenceType } from '../../../../../src/dataflow/environments/identifier'; describe('Function Call', withShell(shell => { describe('Calling previously defined functions', () => { @@ -141,7 +142,7 @@ a(i)`, emptyGraph() .constant('14', undefined, false) .defineFunction('16', ['15'], { out: [], - in: [{ nodeId: '14', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '14', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '15', graph: new Set(['4', '9', '8', '10', '12', '11', '13', '14', '15']), @@ -214,7 +215,7 @@ a()()`, emptyGraph() .constant('5', undefined, false) .defineFunction('7', ['6'], { out: [], - in: [{ nodeId: '5', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '5', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '6', graph: new Set(['5', '6']), @@ -255,7 +256,7 @@ a()()`, emptyGraph() .constant('3', undefined, false) .defineFunction('5', ['4'], { out: [], - in: [{ nodeId: '3', name: undefined, controlDependencies: [] }], + in: [{ nodeId: '3', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '4', graph: new Set(['3', '4']), @@ -304,7 +305,7 @@ a()()`, emptyGraph() .calls('11', '5') .defineFunction('5', ['4'], { out: [], - in: [{ nodeId: '3', name: 'y', controlDependencies: [] }], + in: [{ nodeId: '3', name: 'y', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '4', graph: new Set(['3', '4']), diff --git a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts index 6c912f8145..10f2d8d5c7 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/dataflow-function-definition-tests.ts @@ -7,6 +7,7 @@ import { OperatorDatabase } from '../../../../../src/r-bridge/lang-4.x/ast/model import { EmptyArgument } from '../../../../../src/r-bridge/lang-4.x/ast/model/nodes/r-function-call'; import { UnnamedFunctionCallPrefix } from '../../../../../src/dataflow/internal/process/functions/call/unnamed-call-handling'; import { MIN_VERSION_LAMBDA } from '../../../../../src/r-bridge/lang-4.x/ast/model/versions'; +import { ReferenceType } from '../../../../../src/dataflow/environments/identifier'; describe('Function Definition', withShell(shell => { describe('Only functions', () => { @@ -17,7 +18,7 @@ describe('Function Definition', withShell(shell => { .call('3', '{', [argumentInCall('2')], { returns: ['2'], reads: [BuiltIn], environment: defaultEnv().pushEnv() }, false) .defineFunction('4', ['3'], { out: [], - in: [{ nodeId: '2', name: 'x', controlDependencies: [] }], + in: [{ nodeId: '2', name: 'x', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '3', graph: new Set(['2', '3']), @@ -117,7 +118,7 @@ describe('Function Definition', withShell(shell => { .defineVariable('0', 'x', { definedBy: ['1', '2'] }) .defineFunction('7', ['6'], { out: [], - in: [{ nodeId: '5', name: 'x', controlDependencies: [] }], + in: [{ nodeId: '5', name: 'x', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '6', graph: new Set(['5', '6']), @@ -236,7 +237,7 @@ describe('Function Definition', withShell(shell => { .defineVariable('5', 'x', { definedBy: ['6', '7'] }, false) .defineFunction('10', ['9'], { out: [], - in: [{ nodeId: '6', name: 'x', controlDependencies: [] }], + in: [{ nodeId: '6', name: 'x', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '9', graph: new Set(['6', '5', '7', '8', '9']), @@ -274,7 +275,7 @@ describe('Function Definition', withShell(shell => { .defineVariable('0', '...', { definedBy: [] }, false) .defineFunction('6', ['5'], { out: [], - in: [{ nodeId: '4', name: '..11', controlDependencies: [] }], + in: [{ nodeId: '4', name: '..11', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '5', graph: new Set(['0', '4', '5']), @@ -344,7 +345,7 @@ describe('Function Definition', withShell(shell => { .defineVariable('2', '...', { definedBy: [] }, false) .defineFunction('11', ['10'], { out: [], - in: [{ nodeId: '9', name: 'foo', controlDependencies: [] }], + in: [{ nodeId: '9', name: 'foo', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '10', graph: new Set(['0', '2', '7', '9', '10']), @@ -391,7 +392,7 @@ describe('Function Definition', withShell(shell => { .defineVariable('19', 'y', { definedBy: ['20', '21'], controlDependencies: [] }, false) .defineFunction('24', ['23'], { out: [], - in: [{ nodeId: '12', name: 'z', controlDependencies: [] }], + in: [{ nodeId: '12', name: 'z', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '23', graph: new Set(['7', '2', '8', '10', '9', '11', '12', '14', '16', '18', '20', '19', '21', '22', '23']), @@ -406,7 +407,7 @@ describe('Function Definition', withShell(shell => { .call('7', '<-', [argumentInCall('5'), argumentInCall('6')], { returns: ['5'], reads: [BuiltIn] }) .defineFunction('4', ['3'], { out: [], - in: [{ nodeId: '2', name: 'x', controlDependencies: [] }], + in: [{ nodeId: '2', name: 'x', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '3', graph: new Set(['2', '3']), @@ -434,7 +435,7 @@ describe('Function Definition', withShell(shell => { .defineVariable('8', 'x', { definedBy: ['9', '10'] }, false) .defineFunction('12', ['11'], { out: [], - in: [{ nodeId: '9', name: 'b', controlDependencies: [] }], + in: [{ nodeId: '9', name: 'b', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '11', graph: new Set(['4', '9', '8', '10', '11']), @@ -559,7 +560,7 @@ print(g())`, emptyGraph() }, { environment: defaultEnv().pushEnv() }, false) .defineFunction('26', ['25'], { out: [], - in: [{ nodeId: '24', name: `${UnnamedFunctionCallPrefix}24`, controlDependencies: [] }], + in: [{ nodeId: '24', name: `${UnnamedFunctionCallPrefix}24`, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '25', graph: new Set(['1', '2', '22', '23', '24', '25']), @@ -612,7 +613,7 @@ print(x)`, emptyGraph() .sideEffectOnCall('5', '21') .defineFunction('12', ['11'], { out: [], - in: [{ nodeId: '6', name: 'x', controlDependencies: [] }], + in: [{ nodeId: '6', name: 'x', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '11', graph: new Set(['6', '7', '8', '5', '9', '10', '11']), @@ -720,7 +721,7 @@ f(5)`, emptyGraph() .constant('17', undefined, false) .defineFunction('30', ['29'], { out: [], - in: [{ nodeId: '10', name: 'k', controlDependencies: [] }], + in: [{ nodeId: '10', name: 'k', controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: '29', graph: new Set(['1', '6', '7', '8', '5', '9', '10', '12', '14', '17', '19', '21', '29']), diff --git a/test/functionality/dataflow/processing-of-elements/functions/dataflow-source-tests.ts b/test/functionality/dataflow/processing-of-elements/functions/dataflow-source-tests.ts index 7763863135..ee3decd974 100644 --- a/test/functionality/dataflow/processing-of-elements/functions/dataflow-source-tests.ts +++ b/test/functionality/dataflow/processing-of-elements/functions/dataflow-source-tests.ts @@ -7,6 +7,7 @@ import { requestProviderFromFile, requestProviderFromText } from '../../../../.. import { OperatorDatabase } from '../../../../../src/r-bridge/lang-4.x/ast/model/operators'; import { BuiltIn } from '../../../../../src/dataflow/environments/built-in'; import { EmptyArgument } from '../../../../../src/r-bridge/lang-4.x/ast/model/nodes/r-function-call'; +import { ReferenceType } from '../../../../../src/dataflow/environments/identifier'; describe('source', withShell(shell => { const sources = { @@ -130,7 +131,7 @@ describe('source', withShell(shell => { .constant('closure1-1:1-1:6-3', undefined, false) .defineFunction('closure1-1:1-1:6-5', ['closure1-1:1-1:6-3'], { out: [], - in: [{ nodeId: 'closure1-1:1-1:6-3', name: undefined, controlDependencies: [] }], + in: [{ nodeId: 'closure1-1:1-1:6-3', name: undefined, controlDependencies: [], type: ReferenceType.Argument }], unknownReferences: [], entryPoint: 'closure1-1:1-1:6-3', graph: new Set(['closure1-1:1-1:6-3']), diff --git a/test/functionality/slicing/static-program-slices/calls-tests.ts b/test/functionality/slicing/static-program-slices/calls-tests.ts index e20174837e..18861b3632 100644 --- a/test/functionality/slicing/static-program-slices/calls-tests.ts +++ b/test/functionality/slicing/static-program-slices/calls-tests.ts @@ -528,6 +528,10 @@ y` /* the formatting here seems wild, why five spaces */, { expectedOutput: '[1] assertSliced(label('Switch with named arguments', ['switch', ...OperatorDatabase['<-'].capabilities, 'numbers', 'strings', 'named-arguments', 'unnamed-arguments', 'switch', 'function-calls' ]), shell, 'x <- switch("a", a=1, b=2, c=3)', ['1@x'], 'x <- switch("a", a=1, b=2, c=3)'); }); + describe('Separate Function Resolution', () => { + assertSliced(label('Separate function resolution', ['name-normal', 'numbers', ...OperatorDatabase['<-'].capabilities, 'normal-definition', 'call-normal', 'newlines', 'search-type']), + shell, 'c <- 3\nc(1, 2, 3)', ['2@c'], 'c(1, 2, 3)'); + }); describe('Failures in Practice', () => { /* adapted from a complex pipe in practice */ describe('Nested Pipes', () => { From 58afe5ebd8a449520d53749b2946a64f08247304 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Thu, 26 Sep 2024 06:51:38 +0200 Subject: [PATCH 38/41] feat-fix: resolve by type --- src/dataflow/environments/resolve-by-name.ts | 12 +++++------- .../call/built-in/built-in-expression-list.ts | 11 ++++++++--- test/functionality/util/quads-tests.ts | 1 + 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/dataflow/environments/resolve-by-name.ts b/src/dataflow/environments/resolve-by-name.ts index 79286b5aa3..a601647ef2 100644 --- a/src/dataflow/environments/resolve-by-name.ts +++ b/src/dataflow/environments/resolve-by-name.ts @@ -1,18 +1,19 @@ import type { IEnvironment, REnvironmentInformation } from './environment'; -import { BuiltInEnvironment } from './environment'; +import { BuiltInEnvironment } from './environment'; import { Ternary } from '../../util/logic'; import type { Identifier, IdentifierDefinition } from './identifier'; import { ReferenceType } from './identifier'; import { happensInEveryBranch } from '../info'; +/* TODO: use bitmasks */ const TargetTypePredicate = { [ReferenceType.Unknown]: () => true, - [ReferenceType.Argument]: () => true, - [ReferenceType.Parameter]: () => true, + [ReferenceType.Function]: t => t.type === ReferenceType.Function || t.type === ReferenceType.BuiltInFunction || t.type === ReferenceType.Unknown || t.type === ReferenceType.Argument || t.type === ReferenceType.Parameter, [ReferenceType.Variable]: t => t.type === ReferenceType.Variable || t.type === ReferenceType.Parameter || t.type === ReferenceType.Argument || t.type === ReferenceType.Unknown, - [ReferenceType.Function]: t => t.type === ReferenceType.Function || t.type === ReferenceType.BuiltInFunction || t.type === ReferenceType.Unknown, [ReferenceType.Constant]: t => t.type === ReferenceType.Constant || t.type === ReferenceType.BuiltInConstant || t.type === ReferenceType.Unknown, + [ReferenceType.Parameter]: () => true, + [ReferenceType.Argument]: () => true, [ReferenceType.BuiltInConstant]: t => t.type === ReferenceType.BuiltInConstant || t.type === ReferenceType.Unknown, [ReferenceType.BuiltInFunction]: t => t.type === ReferenceType.BuiltInFunction || t.type === ReferenceType.Unknown } as const satisfies Record boolean>; @@ -30,9 +31,6 @@ export function resolveByName(name: Identifier, environment: REnvironmentInforma let current: IEnvironment = environment.current; let definitions: IdentifierDefinition[] | undefined = undefined; const wantedType = TargetTypePredicate[target]; - if(name === 'c') { - console.trace('resolve', name, target); - } do{ const definition = current.memory.get(name); if(definition !== undefined) { diff --git a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts index ccbff03782..694f5114ff 100644 --- a/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts +++ b/src/dataflow/internal/process/functions/call/built-in/built-in-expression-list.ts @@ -10,8 +10,13 @@ import { linkFunctionCalls } from '../../../../linker'; import { guard, isNotUndefined } from '../../../../../../util/assert'; import { unpackArgument } from '../argument/unpack-argument'; import { patchFunctionCall } from '../common'; -import type { IEnvironment, REnvironmentInformation } from '../../../../../environments/environment'; -import { makeAllMaybe } from '../../../../../environments/environment'; +import type { + IEnvironment, + REnvironmentInformation +} from '../../../../../environments/environment'; +import { + makeAllMaybe +} from '../../../../../environments/environment'; import type { NodeId } from '../../../../../../r-bridge/lang-4.x/ast/model/processing/node-id'; import { DataflowGraph } from '../../../../../graph/graph'; import type { IdentifierReference } from '../../../../../environments/identifier'; @@ -28,7 +33,7 @@ import type { RSymbol } from '../../../../../../r-bridge/lang-4.x/ast/model/node import { dataflowLogger } from '../../../../../logger'; -const dotDotDotAccess = /\.\.\d+/; +const dotDotDotAccess = /^\.\.\d+$/; function linkReadNameToWriteIfPossible(read: IdentifierReference, environments: REnvironmentInformation, listEnvironments: Set, remainingRead: Map, nextGraph: DataflowGraph) { const readName = read.name && dotDotDotAccess.test(read.name) ? '...' : read.name; diff --git a/test/functionality/util/quads-tests.ts b/test/functionality/util/quads-tests.ts index 10ac4c57d0..54378f82f2 100644 --- a/test/functionality/util/quads-tests.ts +++ b/test/functionality/util/quads-tests.ts @@ -65,6 +65,7 @@ describe('Quads', withShell(shell => { <${idPrefix}2> <${domain}onlyBuiltin> "false"^^ <${context}> . <${idPrefix}2> <${domain}args> <${idPrefix}5> <${context}> . <${idPrefix}5> <${domain}nodeId> "1"^^ <${context}> . +<${idPrefix}5> <${domain}type> "5"^^ <${context}> . <${idPrefix}0> <${domain}edges> <${idPrefix}6> <${context}> . <${idPrefix}6> <${domain}from> "3"^^ <${context}> . <${idPrefix}6> <${domain}to> "1"^^ <${context}> . From 1f21dbd23892d6cab3054599c4ab8a9a21dfb998 Mon Sep 17 00:00:00 2001 From: EagleoutIce Date: Thu, 26 Sep 2024 15:33:26 +0200 Subject: [PATCH 39/41] refactor(references): first step to bitmasks --- src/dataflow/environments/identifier.ts | 23 ++++++++++++++------- test/functionality/benchmark/slicer.spec.ts | 4 ++-- test/functionality/util/quads-tests.ts | 2 +- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/src/dataflow/environments/identifier.ts b/src/dataflow/environments/identifier.ts index 21c8b803f8..b341996039 100644 --- a/src/dataflow/environments/identifier.ts +++ b/src/dataflow/environments/identifier.ts @@ -4,23 +4,30 @@ import type { ControlDependency } from '../info'; export type Identifier = string & { __brand?: 'identifier' } +/** + * Each reference only has exactly one reference type, stored as the respective number. + * However, wenn checking we may want to allow for one of several types, + * allowing the combination of the respective bitmasks. + * TODO: write checker functions and bit utility + * TODO: check size-of calculation, does it use the string form and not identify them as numeric? + */ export const enum ReferenceType { /** The identifier type is unknown */ - Unknown, + Unknown = 1, /** The identifier is defined by a function (includes built-in function) */ - Function, + Function = 2, /** The identifier is defined by a variable (includes parameter and argument) */ - Variable, + Variable = 4, /** The identifier is defined by a constant (includes built-in constant) */ - Constant, + Constant = 8, /** The identifier is defined by a parameter (which we know nothing about at the moment) */ - Parameter, + Parameter = 16, /** The identifier is defined by an argument (which we know nothing about at the moment) */ - Argument, + Argument = 32, /** The identifier is defined by a built-in value/constant */ - BuiltInConstant, + BuiltInConstant = 64, /** The identifier is defined by a built-in function */ - BuiltInFunction + BuiltInFunction = 128 } export type InGraphReferenceType = Exclude diff --git a/test/functionality/benchmark/slicer.spec.ts b/test/functionality/benchmark/slicer.spec.ts index 5dcebdad0b..c6b8b36938 100644 --- a/test/functionality/benchmark/slicer.spec.ts +++ b/test/functionality/benchmark/slicer.spec.ts @@ -51,7 +51,7 @@ describe('Benchmark Slicer', () => { numberOfEdges: 4, // the defined-by edge and the arguments numberOfCalls: 1, // `<-` numberOfFunctionDefinitions: 0, // no definitions - sizeOfObject: 398 + sizeOfObject: 400 }, statInfo); assert.strictEqual(stats.perSliceMeasurements.numberOfSlices, 1, `sliced only once ${statInfo}`); @@ -118,7 +118,7 @@ cat(d)` numberOfEdges: 29, numberOfCalls: 9, numberOfFunctionDefinitions: 0, - sizeOfObject: 3197 + sizeOfObject: 3213 }, statInfo); assert.strictEqual(stats.perSliceMeasurements.numberOfSlices, 3, `sliced three times ${statInfo}`); diff --git a/test/functionality/util/quads-tests.ts b/test/functionality/util/quads-tests.ts index 54378f82f2..d5f7ca3545 100644 --- a/test/functionality/util/quads-tests.ts +++ b/test/functionality/util/quads-tests.ts @@ -65,7 +65,7 @@ describe('Quads', withShell(shell => { <${idPrefix}2> <${domain}onlyBuiltin> "false"^^ <${context}> . <${idPrefix}2> <${domain}args> <${idPrefix}5> <${context}> . <${idPrefix}5> <${domain}nodeId> "1"^^ <${context}> . -<${idPrefix}5> <${domain}type> "5"^^ <${context}> . +<${idPrefix}5> <${domain}type> "32"^^ <${context}> . <${idPrefix}0> <${domain}edges> <${idPrefix}6> <${context}> . <${idPrefix}6> <${domain}from> "3"^^ <${context}> . <${idPrefix}6> <${domain}to> "1"^^ <${context}> . From 615098ebbc17bb5f35f5c59a81c6904f14f6f53d Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 27 Sep 2024 08:39:54 +0200 Subject: [PATCH 40/41] refactor: switch to bitmask for reference type --- src/dataflow/environments/identifier.ts | 8 +++++++- src/dataflow/environments/resolve-by-name.ts | 19 ++++++++++++------- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/dataflow/environments/identifier.ts b/src/dataflow/environments/identifier.ts index b341996039..eb9b54b927 100644 --- a/src/dataflow/environments/identifier.ts +++ b/src/dataflow/environments/identifier.ts @@ -8,7 +8,6 @@ export type Identifier = string & { __brand?: 'identifier' } * Each reference only has exactly one reference type, stored as the respective number. * However, wenn checking we may want to allow for one of several types, * allowing the combination of the respective bitmasks. - * TODO: write checker functions and bit utility * TODO: check size-of calculation, does it use the string form and not identify them as numeric? */ export const enum ReferenceType { @@ -30,6 +29,13 @@ export const enum ReferenceType { BuiltInFunction = 128 } +/** + * Check if the reference types have an overlapping type! + */ +export function isReferenceType(t: ReferenceType, target: ReferenceType): boolean { + return (t & target) !== 0; +} + export type InGraphReferenceType = Exclude /** diff --git a/src/dataflow/environments/resolve-by-name.ts b/src/dataflow/environments/resolve-by-name.ts index a601647ef2..a53a3cfe27 100644 --- a/src/dataflow/environments/resolve-by-name.ts +++ b/src/dataflow/environments/resolve-by-name.ts @@ -2,20 +2,25 @@ import type { IEnvironment, REnvironmentInformation } from './environment'; import { BuiltInEnvironment } from './environment'; import { Ternary } from '../../util/logic'; import type { Identifier, IdentifierDefinition } from './identifier'; -import { ReferenceType } from './identifier'; +import { isReferenceType , ReferenceType } from './identifier'; import { happensInEveryBranch } from '../info'; -/* TODO: use bitmasks */ +const FunctionTargetTypes = ReferenceType.Function | ReferenceType.BuiltInFunction | ReferenceType.Unknown | ReferenceType.Argument | ReferenceType.Parameter; +const VariableTargetTypes = ReferenceType.Variable | ReferenceType.Parameter | ReferenceType.Argument | ReferenceType.Unknown; +const ConstantTargetTypes = ReferenceType.Constant | ReferenceType.BuiltInConstant | ReferenceType.Unknown; +const BuiltInConstantTargetTypes = ReferenceType.BuiltInConstant | ReferenceType.Unknown; +const BuiltInFunctionTargetTypes = ReferenceType.BuiltInFunction | ReferenceType.Unknown; + const TargetTypePredicate = { [ReferenceType.Unknown]: () => true, - [ReferenceType.Function]: t => t.type === ReferenceType.Function || t.type === ReferenceType.BuiltInFunction || t.type === ReferenceType.Unknown || t.type === ReferenceType.Argument || t.type === ReferenceType.Parameter, - [ReferenceType.Variable]: t => t.type === ReferenceType.Variable || t.type === ReferenceType.Parameter || t.type === ReferenceType.Argument || t.type === ReferenceType.Unknown, - [ReferenceType.Constant]: t => t.type === ReferenceType.Constant || t.type === ReferenceType.BuiltInConstant || t.type === ReferenceType.Unknown, + [ReferenceType.Function]: ({ type }: IdentifierDefinition) => isReferenceType(type, FunctionTargetTypes), + [ReferenceType.Variable]: ({ type }: IdentifierDefinition) => isReferenceType(type, VariableTargetTypes), + [ReferenceType.Constant]: ({ type }: IdentifierDefinition) => isReferenceType(type, ConstantTargetTypes), [ReferenceType.Parameter]: () => true, [ReferenceType.Argument]: () => true, - [ReferenceType.BuiltInConstant]: t => t.type === ReferenceType.BuiltInConstant || t.type === ReferenceType.Unknown, - [ReferenceType.BuiltInFunction]: t => t.type === ReferenceType.BuiltInFunction || t.type === ReferenceType.Unknown + [ReferenceType.BuiltInConstant]: ({ type }: IdentifierDefinition) => isReferenceType(type, BuiltInConstantTargetTypes), + [ReferenceType.BuiltInFunction]: ({ type }: IdentifierDefinition) => isReferenceType(type, BuiltInFunctionTargetTypes) } as const satisfies Record boolean>; /** From a708bd6650858ef46c522cd05e2a5cc1eaa6a1e2 Mon Sep 17 00:00:00 2001 From: Florian Sihler Date: Fri, 27 Sep 2024 08:53:35 +0200 Subject: [PATCH 41/41] refactor: fix sizeof of uncompacted objects --- src/benchmark/stats/size-of.ts | 8 +++++--- src/dataflow/environments/identifier.ts | 1 - test/functionality/benchmark/slicer.spec.ts | 4 ++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/benchmark/stats/size-of.ts b/src/benchmark/stats/size-of.ts index 54892a35ef..fae13e26ce 100644 --- a/src/benchmark/stats/size-of.ts +++ b/src/benchmark/stats/size-of.ts @@ -6,6 +6,7 @@ import { VertexType } from '../../dataflow/graph/vertex'; import type { Identifier, IdentifierDefinition } from '../../dataflow/environments/identifier'; import { ReferenceType } from '../../dataflow/environments/identifier'; import sizeof from 'object-sizeof'; +import { compactRecord } from '../../util/objects'; /* we have to kill all processors linked in the default environment as they cannot be serialized and they are shared anyway */ function killBuiltInEnv(env: IEnvironment | undefined): IEnvironment { @@ -37,6 +38,7 @@ export function getSizeOfDfGraph(df: DataflowGraph): number { const verts = []; for(const [, v] of df.vertices(true)) { let vertex: DataflowGraphVertexInfo = v; + if(vertex.environment) { vertex = { ...vertex, @@ -60,11 +62,11 @@ export function getSizeOfDfGraph(df: DataflowGraph): number { } as DataflowGraphVertexInfo; } - vertex = { + vertex = compactRecord({ ...vertex, /* shared anyway by using constants */ - tag: 0 as unknown - } as DataflowGraphVertexInfo; + tag: undefined + }) as DataflowGraphVertexInfo; verts.push(vertex); } diff --git a/src/dataflow/environments/identifier.ts b/src/dataflow/environments/identifier.ts index eb9b54b927..36d86d663c 100644 --- a/src/dataflow/environments/identifier.ts +++ b/src/dataflow/environments/identifier.ts @@ -8,7 +8,6 @@ export type Identifier = string & { __brand?: 'identifier' } * Each reference only has exactly one reference type, stored as the respective number. * However, wenn checking we may want to allow for one of several types, * allowing the combination of the respective bitmasks. - * TODO: check size-of calculation, does it use the string form and not identify them as numeric? */ export const enum ReferenceType { /** The identifier type is unknown */ diff --git a/test/functionality/benchmark/slicer.spec.ts b/test/functionality/benchmark/slicer.spec.ts index c6b8b36938..f6f84ca008 100644 --- a/test/functionality/benchmark/slicer.spec.ts +++ b/test/functionality/benchmark/slicer.spec.ts @@ -51,7 +51,7 @@ describe('Benchmark Slicer', () => { numberOfEdges: 4, // the defined-by edge and the arguments numberOfCalls: 1, // `<-` numberOfFunctionDefinitions: 0, // no definitions - sizeOfObject: 400 + sizeOfObject: 196 }, statInfo); assert.strictEqual(stats.perSliceMeasurements.numberOfSlices, 1, `sliced only once ${statInfo}`); @@ -118,7 +118,7 @@ cat(d)` numberOfEdges: 29, numberOfCalls: 9, numberOfFunctionDefinitions: 0, - sizeOfObject: 3213 + sizeOfObject: 1649 }, statInfo); assert.strictEqual(stats.perSliceMeasurements.numberOfSlices, 3, `sliced three times ${statInfo}`);