Skip to content

Commit

Permalink
chore: fix formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
zieka committed Nov 18, 2023
1 parent c5f9ec7 commit e00bdcf
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 33 deletions.
76 changes: 46 additions & 30 deletions moderne_visualizations_misc/composite_recipe_results_sankey.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"source": [
"from code_data_science import data_table as dt\n",
"import warnings\n",
"\n",
"warnings.simplefilter(\"ignore\")\n",
"\n",
"df = dt.read_csv(\"../samples/composite_recipe_results_sankey.csv\")\n",
Expand All @@ -41,18 +42,26 @@
"metadata": {},
"outputs": [],
"source": [
"df = df[['parentRecipe','recipe']].assign(count=lambda r: 1)\n",
"recipe_counts = df[['parentRecipe','recipe','count']].groupby(by=['parentRecipe','recipe'])['count'].count().sort_values(ascending=False).reset_index(name='count')\n",
"recipe_counts['parentRecipe'] = recipe_counts['parentRecipe'].transform(lambda s: s.split(\".\")[-1])\n",
"recipe_counts['recipe'] = recipe_counts['recipe'].transform(lambda s: s.split(\".\")[-1])\n",
"df = df[[\"parentRecipe\", \"recipe\"]].assign(count=lambda r: 1)\n",
"recipe_counts = (\n",
" df[[\"parentRecipe\", \"recipe\", \"count\"]]\n",
" .groupby(by=[\"parentRecipe\", \"recipe\"])[\"count\"]\n",
" .count()\n",
" .sort_values(ascending=False)\n",
" .reset_index(name=\"count\")\n",
")\n",
"recipe_counts[\"parentRecipe\"] = recipe_counts[\"parentRecipe\"].transform(\n",
" lambda s: s.split(\".\")[-1]\n",
")\n",
"recipe_counts[\"recipe\"] = recipe_counts[\"recipe\"].transform(lambda s: s.split(\".\")[-1])\n",
"\n",
"# sort recipe_counts by count\n",
"recipe_counts = recipe_counts.sort_values(by=['count'], ascending=False)\n",
"recipe_counts = recipe_counts.sort_values(by=[\"count\"], ascending=False)\n",
"\n",
"count_threshold_int = int(count_threshold);\n",
"count_threshold_int = int(count_threshold)\n",
"\n",
"if (count_threshold_int > 0):\n",
" recipe_counts = recipe_counts[recipe_counts['count'] > count_threshold_int]"
"if count_threshold_int > 0:\n",
" recipe_counts = recipe_counts[recipe_counts[\"count\"] > count_threshold_int]"
]
},
{
Expand All @@ -65,55 +74,62 @@
"import pandas as pd\n",
"\n",
"# Extract all unique nodes (parent and child recipes)\n",
"all_nodes = list(set(recipe_counts['parentRecipe']).union(set(recipe_counts['recipe'])))\n",
"all_nodes = list(set(recipe_counts[\"parentRecipe\"]).union(set(recipe_counts[\"recipe\"])))\n",
"\n",
"# Create node indices mapping for preparing for plotly\n",
"node_indices = {node: i for i, node in enumerate(all_nodes)}\n",
"\n",
"# iterates over each row in recipe_counts and find the index of the 'parentRecipe' in node_indices and adds it to the source list.\n",
"source = [node_indices[row['parentRecipe']] for _, row in recipe_counts.iterrows()]\n",
"source = [node_indices[row[\"parentRecipe\"]] for _, row in recipe_counts.iterrows()]\n",
"\n",
"# iterates over each row in recipe_counts and find the index of the 'recipe' in node_indices and adds it to the target list.\n",
"target = [node_indices[row['recipe']] for _, row in recipe_counts.iterrows()]\n",
"target = [node_indices[row[\"recipe\"]] for _, row in recipe_counts.iterrows()]\n",
"\n",
"# convert the 'count' column to a list\n",
"value = list(recipe_counts['count'])\n",
"value = list(recipe_counts[\"count\"])\n",
"\n",
"# Format labels with node names and counts\n",
"\n",
"\n",
"def getCount(node):\n",
" left = recipe_counts[recipe_counts['recipe'] == node]['count'].sum()\n",
" left = recipe_counts[recipe_counts[\"recipe\"] == node][\"count\"].sum()\n",
" if left == 0:\n",
" return recipe_counts[recipe_counts['parentRecipe'] == node]['count'].sum()\n",
" return recipe_counts[recipe_counts[\"parentRecipe\"] == node][\"count\"].sum()\n",
" else:\n",
" return left\n",
" \n",
"\n",
"\n",
"formatted_labels = [f\"{node} - {getCount(node)}\" for node in all_nodes]\n",
"\n",
"# Create the Sankey diagram\n",
"fig = go.Figure(data=[go.Sankey(\n",
" node=dict(\n",
" pad=15,\n",
" thickness=15,\n",
" line=dict(color=\"black\", width=0.5),\n",
" label=formatted_labels\n",
" ),\n",
" link=dict(\n",
" source=source,\n",
" target=target,\n",
" value=value,\n",
" \n",
" ))])\n",
"fig = go.Figure(\n",
" data=[\n",
" go.Sankey(\n",
" node=dict(\n",
" pad=15,\n",
" thickness=15,\n",
" line=dict(color=\"black\", width=0.5),\n",
" label=formatted_labels,\n",
" ),\n",
" link=dict(\n",
" source=source,\n",
" target=target,\n",
" value=value,\n",
" ),\n",
" )\n",
" ]\n",
")\n",
"\n",
"# Update layout\n",
"fig.update_layout(\n",
" title_text=\"Recipes that made changes\",\n",
" font_size=10,\n",
" # height=max(len(all_nodes)*15, 400),\n",
" width=800,\n",
" height=800\n",
" height=800,\n",
")\n",
"\n",
"fig.show()\n"
"fig.show()"
]
}
],
Expand Down
2 changes: 0 additions & 2 deletions moderne_visualizations_misc/recipe_performance.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
},
"outputs": [],
"source": [
"\n",
"from code_data_science import data_table as dt\n",
"\n",
"df = dt.read_csv(\"../samples/recipe_performance.csv\")\n",
Expand Down Expand Up @@ -72,7 +71,6 @@
"metadata": {},
"outputs": [],
"source": [
"\n",
"fig = px.bar(\n",
" grouped,\n",
" x=byTotalTime,\n",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
type: specs.moderne.io/v1beta/visualization
name: io.moderne.CompositeRecipeResultsSankey
displayName: Composite Recipe Results
displayName: Composite recipe results
description: >
The number of source files affected by each recipe. Recipes can be composed in hierarchical forms. The representation below is a Sankey diagram. \
Expand Down

0 comments on commit e00bdcf

Please sign in to comment.