diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ad6a4fd..a63fc9ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg] repos: - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.265 + rev: v0.0.269 hooks: - id: ruff args: [--fix] @@ -34,7 +34,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.2.0 + rev: v1.3.0 hooks: - id: mypy additional_dependencies: [types-pyyaml, types-requests] diff --git a/data/wbm/readme.md b/data/wbm/readme.md index a6aa9dd4..f8010af3 100644 --- a/data/wbm/readme.md +++ b/data/wbm/readme.md @@ -104,6 +104,6 @@ Element counts for MP training set consisting of 146,323 `ComputedStructureEntri ## 📊   Symmetry Statistics -With one exception, MP and WBM have diverse representation across all 7 crystal systems. In MP, monoclinic (23%) and orthorhombic (21%) are most prevalent. In WBM, orthorhombic and tetragonal each make up 20%. Triclinic crystals are notably almost absent from WBM at just 1% prevalence, but well represented in MP (15%). Combined with the higher share of cubic structures in WBM (19% vs 14%), WBM structures have overall higher symmetry. This should benefit a model like Wrenformer reliant on symmetries to encode coarse-grained structural features. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a failure case of this featurization. +Both the MP training and WBM test set have good coverage of all 7 crystal systems, triclinic crystals being the only notable exception at just 1% prevalence in WBM but still well represented in MP (15%). In MP, monoclinic (23%) and orthorhombic (21%) are most prevalent. In WBM, orthorhombic and tetragonal each make up 20%. Combined with the higher share of cubic structures in WBM (19% vs 14%), WBM structures have overall higher symmetry. This should benefit a model like Wrenformer reliant on symmetries to encode coarse-grained structural features. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a failure case of this featurization. diff --git a/pyproject.toml b/pyproject.toml index 01980894..4f30103b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ universal = true target-version = "py39" select = [ "B", # flake8-bugbear - "C40", # flake8-comprehensions + "C4", # flake8-comprehensions "D", # pydocstyle "E", # pycodestyle error "F", # pyflakes diff --git a/scripts/analyze_model_disagreement.py b/scripts/analyze_model_disagreement.py new file mode 100644 index 00000000..9b295867 --- /dev/null +++ b/scripts/analyze_model_disagreement.py @@ -0,0 +1,94 @@ +"""Check if points with large error compared to DFT but little disagreement between +models can pinpoint DFT calculation gone wrong. +""" + + +# %% +import pandas as pd +from crystal_toolkit.helpers.utils import hook_up_fig_with_struct_viewer +from pymatviz.utils import add_identity_line, save_fig + +from matbench_discovery import FIGS +from matbench_discovery.data import DATA_FILES +from matbench_discovery.preds import ( + df_preds, + each_true_col, + model_mean_each_col, + model_mean_err_col, + model_std_col, +) + +__author__ = "Janosh Riebesell" +__date__ = "2023-02-15" + + +# %% scatter plot of largest model errors vs. DFT hull distance +# while some points lie on a horizontal line of constant error, more follow the identity +# line showing models are biased to predict low energies likely as a result of training +# on MP which is highly low-energy enriched. +# also possible models failed to learn whatever physics makes these materials highly +# unstable + +material_classes = { + "all": r".*", + "oxides": r".*O\d.*", + "nitrides": r".*N\d.*", + "sulfides": r".*S\d.*", + "halides": r".*[FClBrI]\d.*", + "pnictides": r".*[AsSbBi]\d.*", + "chalcogenides": r".*[SeTe]\d.*", + "borides": r".*B\d.*", + "carbides": r".*C\d.*", + "hydrides": r".*H\d.*", + "oxynitrides": r".*[ON]\d.*", +} +n_structs = 200 + +for material_cls, pattern in material_classes.items(): + df_subset = df_preds[df_preds["formula"].str.match(pattern)] + df_plot = df_subset.nlargest(n_structs, model_mean_err_col).round(2) + + fig = df_plot.plot.scatter( + x=each_true_col, + y=model_mean_each_col, + color=model_std_col, + size="n_sites", + backend="plotly", + hover_name="material_id", + hover_data=["formula"], + color_continuous_scale="Turbo", + # range_color=[0, df_plot[model_std_col].max()], + ) + # for horizontal colorbar + # yanchor="bottom", y=1, xanchor="center", x=0.5, orientation="h", thickness=12 + fig.layout.coloraxis.colorbar.update(title_side="right", thickness=14) + fig.layout.margin.update(l=0, r=30, b=0, t=60) + add_identity_line(fig) + fig.layout.title.update( + text=f"{n_structs} largest {material_cls} model errors: Predicted vs.
" + "DFT hull distance colored by model disagreement", + x=0.5, + ) + # tried setting error_y=model_std_col but looks bad + # fig.update_traces( + # error_y=dict(color="rgba(255,255,255,0.2)", width=3, thickness=2) + # ) + fig.show() + img_name = f"scatter-largest-errors-models-mean-vs-true-hull-dist-{material_cls}" + save_fig(fig, f"{FIGS}/{img_name}.svelte") + # save_fig(fig, f"{ROOT}/tmp/figs/{img_name}.pdf") + + +# %% +df_cse = pd.read_json(DATA_FILES.wbm_cses_plus_init_structs).set_index("material_id") + + +# %% struct viewer +app = hook_up_fig_with_struct_viewer( + fig, + df_cse, + "initial_structure", + # validate_id requires material_id to be hover_name + validate_id=lambda id: id.startswith(("wbm-", "mp-", "mvc-")), +) +app.run(port=8000) diff --git a/scripts/analyze_model_failure_cases.py b/scripts/analyze_model_failure_cases.py index 290e58dd..6813122e 100644 --- a/scripts/analyze_model_failure_cases.py +++ b/scripts/analyze_model_failure_cases.py @@ -13,7 +13,7 @@ import plotly.graph_objs as go from pymatgen.core import Composition, Structure from pymatviz import count_elements, plot_structure_2d, ptable_heatmap_plotly -from pymatviz.utils import add_identity_line, save_fig +from pymatviz.utils import save_fig from tqdm import tqdm from matbench_discovery import FIGS, ROOT @@ -25,9 +25,7 @@ df_metrics, df_preds, each_true_col, - model_mean_each_col, model_mean_err_col, - model_std_col, ) __author__ = "Janosh Riebesell" @@ -172,44 +170,6 @@ df_preds[n_examp_for_rarest_elem_col] = df_wbm[n_examp_for_rarest_elem_col] -# %% scatter plot of largest model errors vs. DFT hull distance -# while some points lie on a horizontal line of constant error, more follow the identity -# line showing models are biased to predict low energies likely as a result of training -# on MP which is highly low-energy enriched. -# also possible models failed to learn whatever physics makes these materials highly -# unstable -n_structs = 200 -fig = ( - df_preds.nlargest(n_structs, model_mean_err_col) - .round(2) - .plot.scatter( - x=each_true_col, - y=model_mean_each_col, - color=model_std_col, - size="n_sites", - backend="plotly", - hover_name="material_id", - hover_data=["formula"], - color_continuous_scale="Turbo", - ) -) -# yanchor="bottom", y=1, xanchor="center", x=0.5, orientation="h", thickness=12 -fig.layout.coloraxis.colorbar.update(title_side="right", thickness=14) -fig.layout.margin.update(l=0, r=30, b=0, t=30) -add_identity_line(fig) -fig.layout.title.update( - text=f"{n_structs} largest model errors: Predicted vs. DFT hull distance
" - "colored by model disagreement", - x=0.5, -) -# tried setting error_y=model_std_col but looks bad -# fig.update_traces(error_y=dict(color="rgba(255,255,255,0.2)", width=3, thickness=2)) -fig.show() -img_name = "scatter-largest-errors-models-mean-vs-true-hull-dist" -save_fig(fig, f"{FIGS}/{img_name}.svelte") -# save_fig(fig, f"{ROOT}/tmp/figs/{img_name}.pdf") - - # %% find materials that were misclassified by all models for model in df_each_pred: true_pos, false_neg, false_pos, true_neg = classify_stable( diff --git a/site/package.json b/site/package.json index 42424f03..56169ad6 100644 --- a/site/package.json +++ b/site/package.json @@ -21,8 +21,8 @@ "@sveltejs/adapter-static": "^2.0.2", "@sveltejs/kit": "^1.16.3", "@sveltejs/vite-plugin-svelte": "^2.2.0", - "@typescript-eslint/eslint-plugin": "^5.59.5", - "@typescript-eslint/parser": "^5.59.5", + "@typescript-eslint/eslint-plugin": "^5.59.6", + "@typescript-eslint/parser": "^5.59.6", "elementari": "^0.1.8", "eslint": "^8.40.0", "eslint-plugin-svelte3": "^4.0.0", @@ -38,14 +38,14 @@ "remark-math": "3.0.0", "svelte": "^3.59.1", "svelte-check": "^3.3.2", - "svelte-multiselect": "^8.6.1", + "svelte-multiselect": "^8.6.2", "svelte-preprocess": "^5.0.3", "svelte-toc": "^0.5.5", "svelte-zoo": "^0.4.5", "svelte2tsx": "^0.6.14", "tslib": "^2.5.0", "typescript": "5.0.4", - "vite": "^4.3.5" + "vite": "^4.3.7" }, "prettier": { "semi": false, diff --git a/site/src/app.html b/site/src/app.html index 63df4c54..f812a2b0 100644 --- a/site/src/app.html +++ b/site/src/app.html @@ -15,7 +15,7 @@ - +
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte new file mode 100644 index 00000000..bbd2ac25 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte new file mode 100644 index 00000000..93ad41db --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte new file mode 100644 index 00000000..6ad130c1 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte new file mode 100644 index 00000000..46c67297 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte new file mode 100644 index 00000000..4742ad7e --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte new file mode 100644 index 00000000..6c7e1acc --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte new file mode 100644 index 00000000..21e4787e --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte new file mode 100644 index 00000000..be6a293f --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte new file mode 100644 index 00000000..91a9fe00 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte new file mode 100644 index 00000000..07bdd23a --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/routes/models/element-errors-ptable-heatmap.svelte b/site/src/routes/models/element-errors-ptable-heatmap.svelte index 7d88d31e..dfe60968 100644 --- a/site/src/routes/models/element-errors-ptable-heatmap.svelte +++ b/site/src/routes/models/element-errors-ptable-heatmap.svelte @@ -12,7 +12,7 @@ export let current_model: string[] = [models[2]] export let manual_cbar_max: boolean = false export let normalized: boolean = true - export let cbar_max: number | null = 0.03 + export let cbar_max: number | null = 0.3 const test_set_std_key = Object.keys(per_elem_errors).find((key) => key.includes(`Test set standard deviation`) @@ -61,7 +61,7 @@ convex hull. disabled={!manual_cbar_max} bind:value={cbar_max} min={0.01} - max={0.15} + max={0.7} step={0.001} /> {cbar_max} diff --git a/site/src/routes/preprint/+page.md b/site/src/routes/preprint/+page.md index a19aaad7..e080ee10 100644 --- a/site/src/routes/preprint/+page.md +++ b/site/src/routes/preprint/+page.md @@ -243,7 +243,9 @@ We welcome further model submissions at Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain. -We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Ekin Dogus Cubuk, Bowen Deng, Tian Xie and Ryota Tomioka for helpful discussions. We also thank Hai-Chen Wang and co-authors for providing the initial structures for the WBM data set @wang_predicting_2021. +We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set @wang_predicting_2021. + +Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release. ## Author Contributions diff --git a/site/src/routes/preprint/iclr-ml4mat/+page.md b/site/src/routes/preprint/iclr-ml4mat/+page.md index ccd3dec9..d146b657 100644 --- a/site/src/routes/preprint/iclr-ml4mat/+page.md +++ b/site/src/routes/preprint/iclr-ml4mat/+page.md @@ -144,7 +144,9 @@ We welcome further model submissions as well as data contributions for version 2 Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain. -We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Ekin Dogus Cubuk, Bowen Deng, Tian Xie and Ryota Tomioka for helpful discussions. We also thank Hai-Chen Wang and co-authors for providing the initial structures for the WBM data set. +We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set. + +Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release. ## Author Contributions diff --git a/site/src/routes/si/+page.md b/site/src/routes/si/+page.md index bee6f050..c646fa9b 100644 --- a/site/src/routes/si/+page.md +++ b/site/src/routes/si/+page.md @@ -14,7 +14,7 @@ import HistClfPredHullDistModels from '$figs/hist-clf-pred-hull-dist-models-4x2.svelte' import SpacegroupSunburstWbm from '$figs/spacegroup-sunburst-wbm.svelte' import SpacegroupSunburstWrenformerFailures from '$figs/spacegroup-sunburst-wrenformer-failures.svelte' - import ScatterLargestErrorsModelsMeanVsTrueHullDist from '$figs/scatter-largest-errors-models-mean-vs-true-hull-dist.svelte' + import LargestErrorScatterSelect from './largest-error-scatter-select.svelte' import EAboveHullScatterWrenformerFailures from '$figs/e-above-hull-scatter-wrenformer-failures.svelte' import ProtoCountsWrenformerFailures from '$figs/proto-counts-wrenformer-failures.svelte' import ElementPrevalenceVsError from '$figs/element-prevalence-vs-error.svelte' @@ -99,7 +99,7 @@ Given its strong performance on batch 1, it is possible that given sufficiently ## Largest Errors vs DFT Hull Distance {#if mounted} - + {/if} > @label:fig:scatter-largest-errors-models-mean-vs-true-hull-dist DFT vs predicted hull distance (average over all models) for the 200 largest error structures colored by model disagreement (as measured by standard deviation in hull distance predictions from different models) and sized by number of atoms in the structures. This plot shows that high-error predictions are biased towards predicting too small hull distance. This is unsurprising considering MP training data mainly consists of low-energy structure.
diff --git a/site/src/routes/si/largest-error-scatter-select.svelte b/site/src/routes/si/largest-error-scatter-select.svelte new file mode 100644 index 00000000..a6c44743 --- /dev/null +++ b/site/src/routes/si/largest-error-scatter-select.svelte @@ -0,0 +1,29 @@ + + + + + + +