feat: add text matches (#49)

moderneinc · Jul 12, 2024 · a1f9524 · a1f9524
1 parent c054113
commit a1f9524
Show file tree

Hide file tree

Showing 3 changed files with 127 additions and 0 deletions.
diff --git a/moderne_visualizations_misc/images/text_matches_tree_grid.300.png b/moderne_visualizations_misc/images/text_matches_tree_grid.300.png
diff --git a/moderne_visualizations_misc/specs/text_matches_tree_grid.yml b/moderne_visualizations_misc/specs/text_matches_tree_grid.yml
@@ -0,0 +1,20 @@
+---
+type: specs.moderne.io/v1beta/visualization
+name: io.moderne.TextMatchesTreeGrid
+displayName: Text matches
+description: >
+  Unique list of GitHub action secrets used in workflows. Expand the secret row to see a list of repositories using that secret.
+recipe: '*'
+dataTable: org.openrewrite.table.TextMatches
+options:
+  - sub_string_regex:
+      displayName: Regex to match string
+      description: >
+        The matches column of the data table is the line of text containing a match with at ~~> pointing to where the match begins.
+        To find the unique match on each line a regex is used to extract the match.  This regex is typically the one used in the recipe run.
+      required: true
+  - matches_column_header:
+      displayName: Matches column header
+      description: >
+        The name of the column header for matches (default: Matches)
+      required: false
diff --git a/moderne_visualizations_misc/text_matches_tree_grid.ipynb b/moderne_visualizations_misc/text_matches_tree_grid.ipynb
@@ -0,0 +1,107 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "sub_string_regex: str = \"~~>secrets\\.(\\w*)\"\n",
+    "matches_column_header: str = \"Matches\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from code_data_science import (\n",
+    "    data_table as dt,\n",
+    "    unique_dictionaries as ud,\n",
+    "    tree_data_grid,\n",
+    ")\n",
+    "\n",
+    "# dependency report recipe\n",
+    "df = dt.read_csv(\"../samples/github_secrets_in_use.csv\")\n",
+    "\n",
+    "# The match column contains the full line of code and we only need the\n",
+    "# specific match so we can create a new column using regex /~~>secret\\.\\w*/\n",
+    "df[\"specific_match\"] = df[\"match\"].str.extract(sub_string_regex)\n",
+    "\n",
+    "# Group by 'specific_match' and count total occurrences\n",
+    "total_grouped = df.groupby(\"specific_match\").size().reset_index(name=\"total_count\")\n",
+    "\n",
+    "# Initialize unique dictionary tree\n",
+    "tree = ud.UniqueDictionaries()\n",
+    "\n",
+    "# Add total counts to the tree\n",
+    "for _, row in total_grouped.iterrows():\n",
+    "    specific_match_value = row[\"specific_match\"]\n",
+    "    total_count = row[\"total_count\"]\n",
+    "\n",
+    "    tree.add({\"path\": specific_match_value, \"count\": total_count})\n",
+    "\n",
+    "# Group by 'specific_match' and 'repositoryPath' to count repository-specific occurrences\n",
+    "repo_grouped = (\n",
+    "    df.groupby([\"specific_match\", \"repositoryPath\"])\n",
+    "    .size()\n",
+    "    .reset_index(name=\"repo_count\")\n",
+    ")\n",
+    "\n",
+    "# Add repository-specific counts to the tree\n",
+    "for _, row in repo_grouped.iterrows():\n",
+    "    specific_match_value = row[\"specific_match\"]\n",
+    "    repository_path = row[\"repositoryPath\"]\n",
+    "    repo_count = row[\"repo_count\"]\n",
+    "\n",
+    "    tree.add(\n",
+    "        {\"path\": f\"{specific_match_value}:::{repository_path}\", \"count\": repo_count}\n",
+    "    )\n",
+    "\n",
+    "tree_data = []\n",
+    "for item in tree.to_list():\n",
+    "    item[\"path\"] = item[\"path\"].split(\":::\")\n",
+    "    tree_data.append(item)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tree_data_grid.display(\n",
+    "    tree_data,\n",
+    "    matches_column_header,\n",
+    "    [{\"field\": \"count\", \"headerName\": \"Occurrences\", \"minWidth\": 200}],\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}