[04b6IYYB] Fix sampling documentation for apoc.meta.* procs (#3442)

neo4j-contrib · Feb 6, 2023 · 2185551 · 2185551
1 parent d4d4fbd
commit 2185551
Show file tree

Hide file tree

Showing 13 changed files with 118 additions and 53 deletions.
diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.data.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.data.adoc
@@ -25,7 +25,14 @@ apoc.meta.data(config = {} :: MAP?) :: (label :: STRING?, property :: STRING?, c
 |===
 
 == Config parameters
-include::partial$usage/config/apoc.meta.data.adoc[]
+This procedure supports the following config parameters:
+
+.Config parameters
+[opts=header]
+|===
+| Name | Type | Default | Description
+| sample | Long | 1000 | Number of nodes to skip, e.g. a sample of 1000 will read every 1000th node.
+|===
 
 == Output parameters
 [.procedures, opts=header]

diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.data.of.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.data.of.adoc
@@ -25,8 +25,16 @@ apoc.meta.data.of(graph :: ANY?, config = {} :: MAP?) :: (label :: STRING?, prop
 |config|MAP?|{}
 |===
 
-== Config parameters
-include::partial$usage/config/apoc.meta.data.of.adoc[]
+== Config Parameters
+This procedure supports the following config parameters:
+
+.Config parameters
+[opts=header]
+|===
+| Name | Type | Default | Description
+| sample | Long | 1000 | Number of nodes to skip, e.g. a sample of 1000 will read every 1000th node.
+|===
+
 
 == Output parameters
 [.procedures, opts=header]

diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.graph.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.graph.adoc
@@ -35,6 +35,10 @@ include::partial$usage/config/apoc.meta.graph.adoc[]
 |relationships|LIST? OF RELATIONSHIP?
 |===
 
+[[sampling-apoc.meta.graph]]
+== Sampling
+include::partial$usage/apoc.meta.samplingDesc.adoc[]
+
 [[usage-apoc.meta.graph]]
 == Usage Examples
 include::partial$usage/apoc.meta.graph.adoc[]

diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.graph.of.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.graph.of.adoc
@@ -26,7 +26,7 @@ apoc.meta.graph.of(graph = {} :: ANY?, config = {} :: MAP?) :: (nodes :: LIST? O
 |===
 
 == Config parameters
-include::partial$usage/config/apoc.meta.graph.of.adoc[]
+include::partial$usage/config/apoc.meta.graph.adoc[]
 
 == Output parameters
 [.procedures, opts=header]
@@ -36,6 +36,10 @@ include::partial$usage/config/apoc.meta.graph.of.adoc[]
 |relationships|LIST? OF RELATIONSHIP?
 |===
 
+[[sampling-apoc.meta.graph]]
+== Sampling
+include::partial$usage/apoc.meta.samplingDesc.adoc[]
+
 [[usage-apoc.meta.graph.of]]
 == Usage Examples
 include::partial$usage/apoc.meta.graph.of.adoc[]

diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.schema.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.schema.adoc
@@ -25,7 +25,14 @@ apoc.meta.schema(config = {} :: MAP?) :: (value :: MAP?)
 |===
 
 == Config parameters
-include::partial$usage/config/apoc.meta.schema.adoc[]
+This procedure supports the following config parameters:
+
+.Config parameters
+[opts=header]
+|===
+| Name | Type | Default | Description
+| sample | Long | 1000 | Number of nodes to skip, e.g. a sample of 1000 will read every 1000th node.
+|===
 
 == Output parameters
 [.procedures, opts=header]

diff --git a/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.subGraph.adoc b/docs/asciidoc/modules/ROOT/pages/overview/apoc.meta/apoc.meta.subGraph.adoc
@@ -25,7 +25,7 @@ apoc.meta.subGraph(config :: MAP?) :: (nodes :: LIST? OF NODE?, relationships ::
 |===
 
 == Config parameters
-include::partial$usage/config/apoc.meta.subGraph.adoc[]
+include::partial$usage/config/apoc.meta.graph.adoc[]
 
 == Output parameters
 [.procedures, opts=header]
@@ -35,6 +35,10 @@ include::partial$usage/config/apoc.meta.subGraph.adoc[]
 |relationships|LIST? OF RELATIONSHIP?
 |===
 
+[[sampling-apoc.meta.graph]]
+== Sampling
+include::partial$usage/apoc.meta.samplingDesc.adoc[]
+
 [[usage-apoc.meta.subGraph]]
 == Usage Examples
 include::partial$usage/apoc.meta.subGraph.adoc[]

diff --git a/docs/asciidoc/modules/ROOT/partials/usage/apoc.meta.nodes.count.adoc b/docs/asciidoc/modules/ROOT/partials/usage/apoc.meta.nodes.count.adoc
@@ -22,11 +22,11 @@ RETURN apoc.meta.nodes.count(['MyCountLabel', 'ThirdLabel']) AS count;
 |===
 
 
-We can return all nodes with a label `MyCountLabel` and a relationship `MY_COUNT_REL` through the config param `rel`
+We can return all nodes with a label `MyCountLabel` and a relationship `MY_COUNT_REL` through the config param `includeRels`
 
 [source,cypher]
 ----
-RETURN apoc.meta.nodes.count(['MyCountLabel'], {rels: ['MY_COUNT_REL']}) AS count;
+RETURN apoc.meta.nodes.count(['MyCountLabel'], {includeRels: ['MY_COUNT_REL']}) AS count;
 ----
 
 .Results
@@ -40,7 +40,7 @@ Moreover, we can return all nodes with a `outcome` relationship `MY_COUNT_REL` (
 
 [source,cypher]
 ----
-RETURN apoc.meta.nodes.count(['MyCountLabel'], {rels: ['MY_COUNT_REL>']}) AS count;
+RETURN apoc.meta.nodes.count(['MyCountLabel'], {includeRels: ['MY_COUNT_REL>']}) AS count;
 ----
 
 .Results
@@ -54,7 +54,7 @@ otherwise with an `incoming` relationship `MY_COUNT_REL` (with the suffix `<`):
 
 [source,cypher]
 ----
-RETURN apoc.meta.nodes.count(['MyCountLabel'], {rels: ['MY_COUNT_REL<']}) AS count;
+RETURN apoc.meta.nodes.count(['MyCountLabel'], {includeRels: ['MY_COUNT_REL<']}) AS count;
 ----
 
 .Results

diff --git a/docs/asciidoc/modules/ROOT/partials/usage/apoc.meta.samplingDesc.adoc b/docs/asciidoc/modules/ROOT/partials/usage/apoc.meta.samplingDesc.adoc
@@ -0,0 +1,18 @@
+This procedure works by using the database statistics. A new node is returned for each label, and its connecting
+relationships are calculated based on the pairing combinations of [:R]->(:N) and (:M)->[:R]. For example, for the graph
+(:A)-[:R]->(:B)-[:R]->(:C), the path (:B)-[:R]->(:B) will be calculated from the combination of [:R]->(:B) and (:B)-[:R].
+This procedure will post-process the data by default, removing all non-existing relationships.
+This is done by scanning the nodes and their relationships.
+If the relationship is not found, it is removed from the final result.
+This slows down the procedure, but will produce an accurate schema.
+
+See xref::overview/apoc.meta/apoc.meta.graphSample.adoc[apoc.meta.graphSample] to avoid performing any post-processing.
+
+It is also possible to specify how many nodes and relationships to scan. The config parameter `sample` gives the skip count,
+and the `maxRels` parameter gives the max number of relationships that will be checked per node.
+If `sample` is set to 100, this means that every 100th node will be checked per label,
+and a value of 100 for `maxRels` means that for each node read, only the first 100 relationships will be read.
+Note that if these values are set, and the relationship is not found within those constraints,
+it is assumed that the relationship does not exist, and this may result in false negatives.
+
+A `sample` value higher than the number of nodes for that label will result in one node being checked.
diff --git a/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.data.adoc b/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.data.adoc
@@ -1,10 +1,22 @@
-The procedure support the following config parameters:
+This procedure supports the following config parameters:
 
 .Config parameters
 [opts=header]
 |===
-| name | type | default | description
-| sample | Long | 1000 | number of nodes to sample per label. See "Sampling" section below.
+| Name | Type | Default | Description
+| includeLabels | List<String> | [] | Node labels to include. Default is to include all node labels.
+| includeRels | List<String> | [] | Relationship types to include. Default is to include all relationship types.
+| excludeLabels | List<String> | [] | Node labels to exclude. Default is to include all node labels.
+| excludeRels | List<String> | [] | Relationship types to exclude. Default is to include all relationship types.
+| sample | Long | 1000 | Number of nodes to skip, e.g. a sample of 1000 will read every 1000th node.
+| maxRels | Long | 100 | Number of relationships to read per sampled node.
 |===
 
-include::partial$usage/config/sample.config.adoc[]
+.Deprecated parameters
+[opts=header]
+|===
+| Name | Type | Default | Description
+| labels | List<String> | [] | Deprecated, use `includeLabels`.
+| rels | List<String> | [] | Deprecated, use `includeRels`.
+| excludes | List<String> | [] | Deprecated, use `excludeLabels`.
+|===
diff --git a/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.graph.adoc b/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.graph.adoc
@@ -1,11 +1,9 @@
-The procedure support the following config parameters:
+This procedure supports the following config parameters:
 
 .Config parameters
 [opts=header]
 |===
-| name | type | default | description
-| sample | Long | 1000 | number of nodes to sample per label. See "Sampling" section below.
-| maxRels | Long | 100 | number of relationships to be analyzed, by type of relationship and start and end label, in order to remove / add relationships incorrectly inserted / not inserted by the sample result.
-|===
-
-include::partial$usage/config/sample.config.adoc[]
+| Name | Type | Default | Description
+| sample | Long | 1 | Number of nodes to skip, e.g. a sample of 1000 will read every 1000th node. Defaults to read every node.
+| maxRels | Long | -1 | Number of relationships to read per sampled node. A value of -1 will read all.
+|===
diff --git a/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.nodeTypeProperties.adoc b/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.nodeTypeProperties.adoc
@@ -1,24 +1,22 @@
-The procedure support the following config parameters:
+This procedure supports the following config parameters:
 
 .Config parameters
 [opts=header]
 |===
-| name | type | default | description
-| includeLabels | List<String> | [] | labels to include. Default is to include all labels
-| includeRels | List<String> | [] | relationship types to include. Default is to include all relationship types
-| excludeLabels | List<String> | [] | labels to exclude. Default is to not exclude any label
-| excludeRels | List<String> | [] | relationship types to exclude. Default is to not exclude any relationship type
-| sample | Long | 1000 | number of nodes to sample per label. See "Sampling" section below.
-| maxRels | Long | 100 | number of relationships to sample per relationship type
+| Name | Type | Default | Description
+| includeLabels | List<String> | [] | Node labels to include. Default is to include all node labels.
+| includeRels | List<String> | [] | Relationship types to include. Default is to include all relationship types.
+| excludeLabels | List<String> | [] | Node labels to exclude. Default is to include all node labels.
+| excludeRels | List<String> | [] | Relationship types to exclude. Default is to include all relationship types.
+| sample | Long | 1000 | Number of nodes to skip, e.g. a sample of 1000 will read every 1000th node.
+| maxRels | Long | 100 | Number of relationships to read per sampled node.
 |===
 
-include::partial$usage/config/sample.config.adoc[]
-
 .Deprecated parameters
 [opts=header]
 |===
-| name | type | default | description
-| labels | List<String> | [] | deprecated, use `includeLabels`
-| rels | List<String> | [] | deprecated, use `includeRels`
-| excludes | List<String> | [] | deprecated, use `excludeLabels`
+| Name | Type | Default | Description
+| labels | List<String> | [] | Deprecated, use `includeLabels`.
+| rels | List<String> | [] | Deprecated, use `includeRels`.
+| excludes | List<String> | [] | Deprecated, use `excludeLabels`.
 |===
diff --git a/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.nodes.count.adoc b/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.nodes.count.adoc
@@ -1,9 +1,16 @@
-The procedure support the following config parameters:
+This procedure supports the following config parameters:
 
 .Config parameters
 [opts=header, cols="1,1,1,5"]
 |===
-| name | type | default | description
-| rels | Set<String> | `EmptySet` | The rel types to consider in the count. 
-    We can add to the suffix `>` or `<` to the rel type name to indicate an outgoing or incoming relationship.
+| Name | Type | Default | Description
+| includeRels | List<String> | [] | Relationship types to include. Default is to include all relationship types.
+Add the suffix `>` or `<` to the relationship type name to indicate an outgoing or incoming relationship.
+|===
+
+.Deprecated parameters
+[opts=header]
+|===
+| Name | Type | Default | Description
+| rels | List<String> | [] | deprecated, use `includeRels`
 |===
diff --git a/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.relTypeProperties.adoc b/docs/asciidoc/modules/ROOT/partials/usage/config/apoc.meta.relTypeProperties.adoc
@@ -1,24 +1,22 @@
-The procedure support the following config parameters:
+This procedure supports the following config parameters:
 
 .Config parameters
 [opts=header]
 |===
-| name | type | default | description
-| includeLabels | List<String> | [] | labels to include. Default is to include all labels
-| includeRels | List<String> | [] | relationship types to include. Default is to include all relationship types
-| excludeLabels | List<String> | [] | labels to exclude. Default is to not exclude any label
-| excludeRels | List<String> | [] | relationship types to exclude. Default is to not exclude any relationship type
-| sample | Long | 1000 | number of nodes to sample per label. See "Sampling" section below.
-| maxRels | Long | 100 | number of relationships to sample per relationship type
+| Name | Type | Default | Description
+| includeLabels | List<String> | [] | Node labels to include. Default is to include all node labels.
+| includeRels | List<String> | [] | Relationship types to include. Default is to include all relationship types.
+| excludeLabels | List<String> | [] | Node labels to exclude. Default is to include all node labels.
+| excludeRels | List<String> | [] | Relationship types to exclude. Default is to include all relationship types.
+| sample | Long | 1000 | Number of nodes to skip, e.g. a sample of 1000 will read every 1000th node.
+| maxRels | Long | 100 | Number of relationships to read per sampled node.
 |===
 
-include::partial$usage/config/sample.config.adoc[]
-
 .Deprecated parameters
 [opts=header]
 |===
-| name | type | default | description
-| labels | List<String> | [] | deprecated, use `includeLabels`
-| rels | List<String> | [] | deprecated, use `includeRels`
-| excludes | List<String> | [] | deprecated, use `excludeLabels`
+| Name | Type | Default | Description
+| labels | List<String> | [] | Deprecated, use `includeLabels`.
+| rels | List<String> | [] | Deprecated, use `includeRels`.
+| excludes | List<String> | [] | Deprecated, use `excludeLabels`.
 |===