diff --git a/moderne_visualizations_misc/composite_recipe_results_sankey.ipynb b/moderne_visualizations_misc/composite_recipe_results_sankey.ipynb index 82299ea..6f87b8a 100644 --- a/moderne_visualizations_misc/composite_recipe_results_sankey.ipynb +++ b/moderne_visualizations_misc/composite_recipe_results_sankey.ipynb @@ -22,6 +22,7 @@ "source": [ "from code_data_science import data_table as dt\n", "import warnings\n", + "\n", "warnings.simplefilter(\"ignore\")\n", "\n", "df = dt.read_csv(\"../samples/composite_recipe_results_sankey.csv\")\n", @@ -41,18 +42,26 @@ "metadata": {}, "outputs": [], "source": [ - "df = df[['parentRecipe','recipe']].assign(count=lambda r: 1)\n", - "recipe_counts = df[['parentRecipe','recipe','count']].groupby(by=['parentRecipe','recipe'])['count'].count().sort_values(ascending=False).reset_index(name='count')\n", - "recipe_counts['parentRecipe'] = recipe_counts['parentRecipe'].transform(lambda s: s.split(\".\")[-1])\n", - "recipe_counts['recipe'] = recipe_counts['recipe'].transform(lambda s: s.split(\".\")[-1])\n", + "df = df[[\"parentRecipe\", \"recipe\"]].assign(count=lambda r: 1)\n", + "recipe_counts = (\n", + " df[[\"parentRecipe\", \"recipe\", \"count\"]]\n", + " .groupby(by=[\"parentRecipe\", \"recipe\"])[\"count\"]\n", + " .count()\n", + " .sort_values(ascending=False)\n", + " .reset_index(name=\"count\")\n", + ")\n", + "recipe_counts[\"parentRecipe\"] = recipe_counts[\"parentRecipe\"].transform(\n", + " lambda s: s.split(\".\")[-1]\n", + ")\n", + "recipe_counts[\"recipe\"] = recipe_counts[\"recipe\"].transform(lambda s: s.split(\".\")[-1])\n", "\n", "# sort recipe_counts by count\n", - "recipe_counts = recipe_counts.sort_values(by=['count'], ascending=False)\n", + "recipe_counts = recipe_counts.sort_values(by=[\"count\"], ascending=False)\n", "\n", - "count_threshold_int = int(count_threshold);\n", + "count_threshold_int = int(count_threshold)\n", "\n", - "if (count_threshold_int > 0):\n", - " recipe_counts = recipe_counts[recipe_counts['count'] > count_threshold_int]" + "if count_threshold_int > 0:\n", + " recipe_counts = recipe_counts[recipe_counts[\"count\"] > count_threshold_int]" ] }, { @@ -65,44 +74,51 @@ "import pandas as pd\n", "\n", "# Extract all unique nodes (parent and child recipes)\n", - "all_nodes = list(set(recipe_counts['parentRecipe']).union(set(recipe_counts['recipe'])))\n", + "all_nodes = list(set(recipe_counts[\"parentRecipe\"]).union(set(recipe_counts[\"recipe\"])))\n", "\n", "# Create node indices mapping for preparing for plotly\n", "node_indices = {node: i for i, node in enumerate(all_nodes)}\n", "\n", "# iterates over each row in recipe_counts and find the index of the 'parentRecipe' in node_indices and adds it to the source list.\n", - "source = [node_indices[row['parentRecipe']] for _, row in recipe_counts.iterrows()]\n", + "source = [node_indices[row[\"parentRecipe\"]] for _, row in recipe_counts.iterrows()]\n", "\n", "# iterates over each row in recipe_counts and find the index of the 'recipe' in node_indices and adds it to the target list.\n", - "target = [node_indices[row['recipe']] for _, row in recipe_counts.iterrows()]\n", + "target = [node_indices[row[\"recipe\"]] for _, row in recipe_counts.iterrows()]\n", "\n", "# convert the 'count' column to a list\n", - "value = list(recipe_counts['count'])\n", + "value = list(recipe_counts[\"count\"])\n", "\n", "# Format labels with node names and counts\n", + "\n", + "\n", "def getCount(node):\n", - " left = recipe_counts[recipe_counts['recipe'] == node]['count'].sum()\n", + " left = recipe_counts[recipe_counts[\"recipe\"] == node][\"count\"].sum()\n", " if left == 0:\n", - " return recipe_counts[recipe_counts['parentRecipe'] == node]['count'].sum()\n", + " return recipe_counts[recipe_counts[\"parentRecipe\"] == node][\"count\"].sum()\n", " else:\n", " return left\n", - " \n", + "\n", + "\n", "formatted_labels = [f\"{node} - {getCount(node)}\" for node in all_nodes]\n", "\n", "# Create the Sankey diagram\n", - "fig = go.Figure(data=[go.Sankey(\n", - " node=dict(\n", - " pad=15,\n", - " thickness=15,\n", - " line=dict(color=\"black\", width=0.5),\n", - " label=formatted_labels\n", - " ),\n", - " link=dict(\n", - " source=source,\n", - " target=target,\n", - " value=value,\n", - " \n", - " ))])\n", + "fig = go.Figure(\n", + " data=[\n", + " go.Sankey(\n", + " node=dict(\n", + " pad=15,\n", + " thickness=15,\n", + " line=dict(color=\"black\", width=0.5),\n", + " label=formatted_labels,\n", + " ),\n", + " link=dict(\n", + " source=source,\n", + " target=target,\n", + " value=value,\n", + " ),\n", + " )\n", + " ]\n", + ")\n", "\n", "# Update layout\n", "fig.update_layout(\n", @@ -110,10 +126,10 @@ " font_size=10,\n", " # height=max(len(all_nodes)*15, 400),\n", " width=800,\n", - " height=800\n", + " height=800,\n", ")\n", "\n", - "fig.show()\n" + "fig.show()" ] } ], diff --git a/moderne_visualizations_misc/recipe_performance.ipynb b/moderne_visualizations_misc/recipe_performance.ipynb index b3ccff3..91b6735 100644 --- a/moderne_visualizations_misc/recipe_performance.ipynb +++ b/moderne_visualizations_misc/recipe_performance.ipynb @@ -22,7 +22,6 @@ }, "outputs": [], "source": [ - "\n", "from code_data_science import data_table as dt\n", "\n", "df = dt.read_csv(\"../samples/recipe_performance.csv\")\n", @@ -72,7 +71,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "fig = px.bar(\n", " grouped,\n", " x=byTotalTime,\n", diff --git a/moderne_visualizations_misc/specs/composite_recipe_results_sankey.yml b/moderne_visualizations_misc/specs/composite_recipe_results_sankey.yml index 665136a..d91ffae 100644 --- a/moderne_visualizations_misc/specs/composite_recipe_results_sankey.yml +++ b/moderne_visualizations_misc/specs/composite_recipe_results_sankey.yml @@ -1,7 +1,7 @@ --- type: specs.moderne.io/v1beta/visualization name: io.moderne.CompositeRecipeResultsSankey -displayName: Composite Recipe Results +displayName: Composite recipe results description: > The number of source files affected by each recipe. Recipes can be composed in hierarchical forms. The representation below is a Sankey diagram. \