janosh · janosh · May 18, 2023 · May 18, 2023 · May 18, 2023 · May 18, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.0.265
+    rev: v0.0.269
     hooks:
       - id: ruff
         args: [--fix]
@@ -34,7 +34,7 @@ repos:
       - id: trailing-whitespace
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.2.0
+    rev: v1.3.0
     hooks:
       - id: mypy
         additional_dependencies: [types-pyyaml, types-requests]

diff --git a/data/wbm/readme.md b/data/wbm/readme.md
@@ -104,6 +104,6 @@ Element counts for MP training set consisting of 146,323 `ComputedStructureEntri
 
 ## 📊 &thinsp; Symmetry Statistics
 
-With one exception, MP and WBM have diverse representation across all 7 crystal systems. In MP, monoclinic (23%) and orthorhombic (21%) are most prevalent. In WBM, orthorhombic and tetragonal each make up 20%. Triclinic crystals are notably almost absent from WBM at just 1% prevalence, but well represented in MP (15%). Combined with the higher share of cubic structures in WBM (19% vs 14%), WBM structures have overall higher symmetry. This should benefit a model like Wrenformer reliant on symmetries to encode coarse-grained structural features. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a failure case of this featurization.
+Both the MP training and WBM test set have good coverage of all 7 crystal systems, triclinic crystals being the only notable exception at just 1% prevalence in WBM but still well represented in MP (15%). In MP, monoclinic (23%) and orthorhombic (21%) are most prevalent. In WBM, orthorhombic and tetragonal each make up 20%. Combined with the higher share of cubic structures in WBM (19% vs 14%), WBM structures have overall higher symmetry. This should benefit a model like Wrenformer reliant on symmetries to encode coarse-grained structural features. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a failure case of this featurization.
 
 <slot name="spacegroup-sunbursts" />
diff --git a/pyproject.toml b/pyproject.toml
@@ -81,7 +81,7 @@ universal = true
 target-version = "py39"
 select = [
   "B",   # flake8-bugbear
-  "C40", # flake8-comprehensions
+  "C4",  # flake8-comprehensions
   "D",   # pydocstyle
   "E",   # pycodestyle error
   "F",   # pyflakes

diff --git a/scripts/analyze_model_disagreement.py b/scripts/analyze_model_disagreement.py
@@ -0,0 +1,94 @@
+"""Check if points with large error compared to DFT but little disagreement between
+models can pinpoint DFT calculation gone wrong.
+"""
+
+
+# %%
+import pandas as pd
+from crystal_toolkit.helpers.utils import hook_up_fig_with_struct_viewer
+from pymatviz.utils import add_identity_line, save_fig
+
+from matbench_discovery import FIGS
+from matbench_discovery.data import DATA_FILES
+from matbench_discovery.preds import (
+    df_preds,
+    each_true_col,
+    model_mean_each_col,
+    model_mean_err_col,
+    model_std_col,
+)
+
+__author__ = "Janosh Riebesell"
+__date__ = "2023-02-15"
+
+
+# %% scatter plot of largest model errors vs. DFT hull distance
+# while some points lie on a horizontal line of constant error, more follow the identity
+# line showing models are biased to predict low energies likely as a result of training
+# on MP which is highly low-energy enriched.
+# also possible models failed to learn whatever physics makes these materials highly
+# unstable
+
+material_classes = {
+    "all": r".*",
+    "oxides": r".*O\d.*",
+    "nitrides": r".*N\d.*",
+    "sulfides": r".*S\d.*",
+    "halides": r".*[FClBrI]\d.*",
+    "pnictides": r".*[AsSbBi]\d.*",
+    "chalcogenides": r".*[SeTe]\d.*",
+    "borides": r".*B\d.*",
+    "carbides": r".*C\d.*",
+    "hydrides": r".*H\d.*",
+    "oxynitrides": r".*[ON]\d.*",
+}
+n_structs = 200
+
+for material_cls, pattern in material_classes.items():
+    df_subset = df_preds[df_preds["formula"].str.match(pattern)]
+    df_plot = df_subset.nlargest(n_structs, model_mean_err_col).round(2)
+
+    fig = df_plot.plot.scatter(
+        x=each_true_col,
+        y=model_mean_each_col,
+        color=model_std_col,
+        size="n_sites",
+        backend="plotly",
+        hover_name="material_id",
+        hover_data=["formula"],
+        color_continuous_scale="Turbo",
+        # range_color=[0, df_plot[model_std_col].max()],
+    )
+    # for horizontal colorbar
+    # yanchor="bottom", y=1, xanchor="center", x=0.5, orientation="h", thickness=12
+    fig.layout.coloraxis.colorbar.update(title_side="right", thickness=14)
+    fig.layout.margin.update(l=0, r=30, b=0, t=60)
+    add_identity_line(fig)
+    fig.layout.title.update(
+        text=f"{n_structs} largest {material_cls} model errors: Predicted vs.<br>"
+        "DFT hull distance colored by model disagreement",
+        x=0.5,
+    )
+    # tried setting error_y=model_std_col but looks bad
+    # fig.update_traces(
+    #     error_y=dict(color="rgba(255,255,255,0.2)", width=3, thickness=2)
+    # )
+    fig.show()
+    img_name = f"scatter-largest-errors-models-mean-vs-true-hull-dist-{material_cls}"
+    save_fig(fig, f"{FIGS}/{img_name}.svelte")
+    # save_fig(fig, f"{ROOT}/tmp/figs/{img_name}.pdf")
+
+
+# %%
+df_cse = pd.read_json(DATA_FILES.wbm_cses_plus_init_structs).set_index("material_id")
+
+
+# %% struct viewer
+app = hook_up_fig_with_struct_viewer(
+    fig,
+    df_cse,
+    "initial_structure",
+    # validate_id requires material_id to be hover_name
+    validate_id=lambda id: id.startswith(("wbm-", "mp-", "mvc-")),
+)
+app.run(port=8000)
diff --git a/scripts/analyze_model_failure_cases.py b/scripts/analyze_model_failure_cases.py
@@ -13,7 +13,7 @@
 import plotly.graph_objs as go
 from pymatgen.core import Composition, Structure
 from pymatviz import count_elements, plot_structure_2d, ptable_heatmap_plotly
-from pymatviz.utils import add_identity_line, save_fig
+from pymatviz.utils import save_fig
 from tqdm import tqdm
 
 from matbench_discovery import FIGS, ROOT
@@ -25,9 +25,7 @@
     df_metrics,
     df_preds,
     each_true_col,
-    model_mean_each_col,
     model_mean_err_col,
-    model_std_col,
 )
 
 __author__ = "Janosh Riebesell"
@@ -172,44 +170,6 @@
 df_preds[n_examp_for_rarest_elem_col] = df_wbm[n_examp_for_rarest_elem_col]
 
 
-# %% scatter plot of largest model errors vs. DFT hull distance
-# while some points lie on a horizontal line of constant error, more follow the identity
-# line showing models are biased to predict low energies likely as a result of training
-# on MP which is highly low-energy enriched.
-# also possible models failed to learn whatever physics makes these materials highly
-# unstable
-n_structs = 200
-fig = (
-    df_preds.nlargest(n_structs, model_mean_err_col)
-    .round(2)
-    .plot.scatter(
-        x=each_true_col,
-        y=model_mean_each_col,
-        color=model_std_col,
-        size="n_sites",
-        backend="plotly",
-        hover_name="material_id",
-        hover_data=["formula"],
-        color_continuous_scale="Turbo",
-    )
-)
-# yanchor="bottom", y=1, xanchor="center", x=0.5, orientation="h", thickness=12
-fig.layout.coloraxis.colorbar.update(title_side="right", thickness=14)
-fig.layout.margin.update(l=0, r=30, b=0, t=30)
-add_identity_line(fig)
-fig.layout.title.update(
-    text=f"{n_structs} largest model errors: Predicted vs. DFT hull distance<br>"
-    "colored by model disagreement",
-    x=0.5,
-)
-# tried setting error_y=model_std_col but looks bad
-# fig.update_traces(error_y=dict(color="rgba(255,255,255,0.2)", width=3, thickness=2))
-fig.show()
-img_name = "scatter-largest-errors-models-mean-vs-true-hull-dist"
-save_fig(fig, f"{FIGS}/{img_name}.svelte")
-# save_fig(fig, f"{ROOT}/tmp/figs/{img_name}.pdf")
-
-
 # %% find materials that were misclassified by all models
 for model in df_each_pred:
     true_pos, false_neg, false_pos, true_neg = classify_stable(

diff --git a/site/package.json b/site/package.json
@@ -21,8 +21,8 @@
     "@sveltejs/adapter-static": "^2.0.2",
     "@sveltejs/kit": "^1.16.3",
     "@sveltejs/vite-plugin-svelte": "^2.2.0",
-    "@typescript-eslint/eslint-plugin": "^5.59.5",
-    "@typescript-eslint/parser": "^5.59.5",
+    "@typescript-eslint/eslint-plugin": "^5.59.6",
+    "@typescript-eslint/parser": "^5.59.6",
     "elementari": "^0.1.8",
     "eslint": "^8.40.0",
     "eslint-plugin-svelte3": "^4.0.0",
@@ -38,14 +38,14 @@
     "remark-math": "3.0.0",
     "svelte": "^3.59.1",
     "svelte-check": "^3.3.2",
-    "svelte-multiselect": "^8.6.1",
+    "svelte-multiselect": "^8.6.2",
     "svelte-preprocess": "^5.0.3",
     "svelte-toc": "^0.5.5",
     "svelte-zoo": "^0.4.5",
     "svelte2tsx": "^0.6.14",
     "tslib": "^2.5.0",
     "typescript": "5.0.4",
-    "vite": "^4.3.5"
+    "vite": "^4.3.7"
   },
   "prettier": {
     "semi": false,

diff --git a/site/src/app.html b/site/src/app.html
@@ -15,7 +15,7 @@
     <meta name="viewport" content="width=device-width,initial-scale=1" />
 
     <meta property="og:type" content="article" />
-    <meta property="og:image" content="favicon.svg" />
+    <meta property="og:image" content="/favicon.svg" />
     <meta
       property="og:url"
       content="https://janosh.github.io/matbench-discovery"

diff --git a/...rors-models-mean-vs-true-hull-dist.svelte → ...-models-mean-vs-true-hull-dist-all.svelte b/...rors-models-mean-vs-true-hull-dist.svelte → ...-models-mean-vs-true-hull-dist-all.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte
diff --git a/site/src/routes/models/element-errors-ptable-heatmap.svelte b/site/src/routes/models/element-errors-ptable-heatmap.svelte
@@ -12,7 +12,7 @@
   export let current_model: string[] = [models[2]]
   export let manual_cbar_max: boolean = false
   export let normalized: boolean = true
-  export let cbar_max: number | null = 0.03
+  export let cbar_max: number | null = 0.3
 
   const test_set_std_key = Object.keys(per_elem_errors).find((key) =>
     key.includes(`Test set standard deviation`)
@@ -61,7 +61,7 @@ convex hull.
       disabled={!manual_cbar_max}
       bind:value={cbar_max}
       min={0.01}
-      max={0.15}
+      max={0.7}
       step={0.001}
     />
     {cbar_max}

diff --git a/site/src/routes/preprint/+page.md b/site/src/routes/preprint/+page.md
@@ -243,7 +243,9 @@ We welcome further model submissions at
 
 Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain.
 
-We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Ekin Dogus Cubuk, Bowen Deng, Tian Xie and Ryota Tomioka for helpful discussions. We also thank Hai-Chen Wang and co-authors for providing the initial structures for the WBM data set @wang_predicting_2021.
+We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set @wang_predicting_2021.
+
+Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release.
 
 ## Author Contributions
 

diff --git a/site/src/routes/preprint/iclr-ml4mat/+page.md b/site/src/routes/preprint/iclr-ml4mat/+page.md
@@ -144,7 +144,9 @@ We welcome further model submissions as well as data contributions for version 2
 
 Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain.
 
-We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Ekin Dogus Cubuk, Bowen Deng, Tian Xie and Ryota Tomioka for helpful discussions. We also thank Hai-Chen Wang and co-authors for providing the initial structures for the WBM data set.
+We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set.
+
+Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release.
 
 ## Author Contributions
 

diff --git a/site/src/routes/si/+page.md b/site/src/routes/si/+page.md
@@ -14,7 +14,7 @@
   import HistClfPredHullDistModels from '$figs/hist-clf-pred-hull-dist-models-4x2.svelte'
   import SpacegroupSunburstWbm from '$figs/spacegroup-sunburst-wbm.svelte'
   import SpacegroupSunburstWrenformerFailures from '$figs/spacegroup-sunburst-wrenformer-failures.svelte'
-  import ScatterLargestErrorsModelsMeanVsTrueHullDist from '$figs/scatter-largest-errors-models-mean-vs-true-hull-dist.svelte'
+  import LargestErrorScatterSelect from './largest-error-scatter-select.svelte'
   import EAboveHullScatterWrenformerFailures from '$figs/e-above-hull-scatter-wrenformer-failures.svelte'
   import ProtoCountsWrenformerFailures from '$figs/proto-counts-wrenformer-failures.svelte'
   import ElementPrevalenceVsError from '$figs/element-prevalence-vs-error.svelte'
@@ -99,7 +99,7 @@ Given its strong performance on batch 1, it is possible that given sufficiently
 ## Largest Errors vs DFT Hull Distance
 
 {#if mounted}
-<ScatterLargestErrorsModelsMeanVsTrueHullDist />
+<LargestErrorScatterSelect />
 {/if}
 
 > @label:fig:scatter-largest-errors-models-mean-vs-true-hull-dist DFT vs predicted hull distance (average over all models) for the 200 largest error structures colored by model disagreement (as measured by standard deviation in hull distance predictions from different models) and sized by number of atoms in the structures. This plot shows that high-error predictions are biased towards predicting too small hull distance. This is unsurprising considering MP training data mainly consists of low-energy structure.<br>

diff --git a/site/src/routes/si/largest-error-scatter-select.svelte b/site/src/routes/si/largest-error-scatter-select.svelte
@@ -0,0 +1,29 @@
+<script lang="ts">
+  import Select from 'svelte-multiselect'
+
+  export let disabled: boolean = false
+
+  const figs = import.meta.glob(
+    `$figs/scatter-largest-errors-models-mean-vs-true-hull-dist-*.svelte`,
+    { eager: true }
+  )
+
+  let selected: string[] = [Object.keys(figs)[0]]
+</script>
+
+<Select options={Object.keys(figs)} bind:selected minSelect={1} maxSelect={1} {disabled}>
+  <span let:option slot="selected">
+    {option.split(`-`).slice(-1)[0].split(`.`)[0]}
+  </span>
+  <span let:option slot="option">
+    {option.split(`-`).slice(-1)[0].split(`.`)[0]}
+  </span>
+</Select>
+
+<svelte:component this={figs[selected[0]]?.default} />
+
+<style>
+  span {
+    text-transform: capitalize;
+  }
+</style>