From 76139c71ddb6ef5bdd8cc67196613aab3d3d22ec Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:19:51 -0700 Subject: [PATCH 1/8] split analyze_model_disagreement.py out of analyze_model_failure_cases.py --- data/wbm/readme.md | 2 +- pyproject.toml | 2 +- scripts/analyze_model_disagreement.py | 93 ++++++++++++++++++++++++++ scripts/analyze_model_failure_cases.py | 42 +----------- 4 files changed, 96 insertions(+), 43 deletions(-) create mode 100644 scripts/analyze_model_disagreement.py diff --git a/data/wbm/readme.md b/data/wbm/readme.md index a6aa9dd4..f8010af3 100644 --- a/data/wbm/readme.md +++ b/data/wbm/readme.md @@ -104,6 +104,6 @@ Element counts for MP training set consisting of 146,323 `ComputedStructureEntri ## 📊   Symmetry Statistics -With one exception, MP and WBM have diverse representation across all 7 crystal systems. In MP, monoclinic (23%) and orthorhombic (21%) are most prevalent. In WBM, orthorhombic and tetragonal each make up 20%. Triclinic crystals are notably almost absent from WBM at just 1% prevalence, but well represented in MP (15%). Combined with the higher share of cubic structures in WBM (19% vs 14%), WBM structures have overall higher symmetry. This should benefit a model like Wrenformer reliant on symmetries to encode coarse-grained structural features. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a failure case of this featurization. +Both the MP training and WBM test set have good coverage of all 7 crystal systems, triclinic crystals being the only notable exception at just 1% prevalence in WBM but still well represented in MP (15%). In MP, monoclinic (23%) and orthorhombic (21%) are most prevalent. In WBM, orthorhombic and tetragonal each make up 20%. Combined with the higher share of cubic structures in WBM (19% vs 14%), WBM structures have overall higher symmetry. This should benefit a model like Wrenformer reliant on symmetries to encode coarse-grained structural features. See [SI](/si#spacegroup-prevalence-in-wrenformer-failure-cases) for a failure case of this featurization. diff --git a/pyproject.toml b/pyproject.toml index 01980894..4f30103b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,7 +81,7 @@ universal = true target-version = "py39" select = [ "B", # flake8-bugbear - "C40", # flake8-comprehensions + "C4", # flake8-comprehensions "D", # pydocstyle "E", # pycodestyle error "F", # pyflakes diff --git a/scripts/analyze_model_disagreement.py b/scripts/analyze_model_disagreement.py new file mode 100644 index 00000000..c5011623 --- /dev/null +++ b/scripts/analyze_model_disagreement.py @@ -0,0 +1,93 @@ +"""Check if points with large error compared to DFT but little disagreement between +models can pinpoint DFT calculation gone wrong. +""" + + +# %% +import pandas as pd +from crystal_toolkit.helpers.utils import hook_up_fig_with_struct_viewer +from pymatviz.utils import add_identity_line, save_fig + +from matbench_discovery import FIGS +from matbench_discovery.data import DATA_FILES +from matbench_discovery.preds import ( + df_preds, + each_true_col, + model_mean_each_col, + model_mean_err_col, + model_std_col, +) + +__author__ = "Janosh Riebesell" +__date__ = "2023-02-15" + + +# %% scatter plot of largest model errors vs. DFT hull distance +# while some points lie on a horizontal line of constant error, more follow the identity +# line showing models are biased to predict low energies likely as a result of training +# on MP which is highly low-energy enriched. +# also possible models failed to learn whatever physics makes these materials highly +# unstable + +material_classes = { + "all": r".*", + "oxides": r".*O\d.*", + "nitrides": r".*N\d.*", + "sulfides": r".*S\d.*", + "halides": r".*[FClBrI]\d.*", + "pnictides": r".*[AsSbBi]\d.*", + "chalcogenides": r".*[SeTe]\d.*", + "borides": r".*B\d.*", + "carbides": r".*C\d.*", + "hydrides": r".*H\d.*", + "oxynitrides": r".*[ON]\d.*", +} +for material_cls, pattern in material_classes.items(): + df_oxides = df_preds[df_preds["formula"].str.match(pattern)] + n_structs = 100 + df_plot = df_oxides.nlargest(n_structs, model_mean_err_col).round(2) + + fig = df_plot.plot.scatter( + x=each_true_col, + y=model_mean_each_col, + color=model_std_col, + size="n_sites", + backend="plotly", + hover_name="material_id", + hover_data=["formula"], + color_continuous_scale="Turbo", + # range_color=[0, df_plot[model_std_col].max()], + ) + # for horizontal colorbar + # yanchor="bottom", y=1, xanchor="center", x=0.5, orientation="h", thickness=12 + fig.layout.coloraxis.colorbar.update(title_side="right", thickness=14) + fig.layout.margin.update(l=0, r=30, b=0, t=60) + add_identity_line(fig) + fig.layout.title.update( + text=f"{n_structs} largest {material_cls} model errors: Predicted vs.
" + "DFT hull distance colored by model disagreement", + x=0.5, + ) + # tried setting error_y=model_std_col but looks bad + # fig.update_traces( + # error_y=dict(color="rgba(255,255,255,0.2)", width=3, thickness=2) + # ) + fig.show() + img_name = f"scatter-largest-errors-models-mean-vs-true-hull-dist-{material_cls}" + save_fig(fig, f"{FIGS}/{img_name}.svelte") + # save_fig(fig, f"{ROOT}/tmp/figs/{img_name}.pdf") + + +# %% +df_cse = pd.read_json(DATA_FILES.wbm_cses_plus_init_structs).set_index("material_id") + + +# %% struct viewer +app = hook_up_fig_with_struct_viewer( + fig, + df_cse, + "initial_structure", + # validate_id requires material_id to be hover_name + validate_id=lambda id: id.startswith(("wbm-", "mp-", "mvc-")), +) +app.run(port=8000) diff --git a/scripts/analyze_model_failure_cases.py b/scripts/analyze_model_failure_cases.py index 290e58dd..6813122e 100644 --- a/scripts/analyze_model_failure_cases.py +++ b/scripts/analyze_model_failure_cases.py @@ -13,7 +13,7 @@ import plotly.graph_objs as go from pymatgen.core import Composition, Structure from pymatviz import count_elements, plot_structure_2d, ptable_heatmap_plotly -from pymatviz.utils import add_identity_line, save_fig +from pymatviz.utils import save_fig from tqdm import tqdm from matbench_discovery import FIGS, ROOT @@ -25,9 +25,7 @@ df_metrics, df_preds, each_true_col, - model_mean_each_col, model_mean_err_col, - model_std_col, ) __author__ = "Janosh Riebesell" @@ -172,44 +170,6 @@ df_preds[n_examp_for_rarest_elem_col] = df_wbm[n_examp_for_rarest_elem_col] -# %% scatter plot of largest model errors vs. DFT hull distance -# while some points lie on a horizontal line of constant error, more follow the identity -# line showing models are biased to predict low energies likely as a result of training -# on MP which is highly low-energy enriched. -# also possible models failed to learn whatever physics makes these materials highly -# unstable -n_structs = 200 -fig = ( - df_preds.nlargest(n_structs, model_mean_err_col) - .round(2) - .plot.scatter( - x=each_true_col, - y=model_mean_each_col, - color=model_std_col, - size="n_sites", - backend="plotly", - hover_name="material_id", - hover_data=["formula"], - color_continuous_scale="Turbo", - ) -) -# yanchor="bottom", y=1, xanchor="center", x=0.5, orientation="h", thickness=12 -fig.layout.coloraxis.colorbar.update(title_side="right", thickness=14) -fig.layout.margin.update(l=0, r=30, b=0, t=30) -add_identity_line(fig) -fig.layout.title.update( - text=f"{n_structs} largest model errors: Predicted vs. DFT hull distance
" - "colored by model disagreement", - x=0.5, -) -# tried setting error_y=model_std_col but looks bad -# fig.update_traces(error_y=dict(color="rgba(255,255,255,0.2)", width=3, thickness=2)) -fig.show() -img_name = "scatter-largest-errors-models-mean-vs-true-hull-dist" -save_fig(fig, f"{FIGS}/{img_name}.svelte") -# save_fig(fig, f"{ROOT}/tmp/figs/{img_name}.pdf") - - # %% find materials that were misclassified by all models for model in df_each_pred: true_pos, false_neg, false_pos, true_neg = classify_stable( From 687246c7477f9e0a2eb34ece9e72085e9847a3aa Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:20:39 -0700 Subject: [PATCH 2/8] update deps --- .pre-commit-config.yaml | 4 ++-- site/package.json | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5ad6a4fd..a63fc9ff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg] repos: - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.265 + rev: v0.0.269 hooks: - id: ruff args: [--fix] @@ -34,7 +34,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.2.0 + rev: v1.3.0 hooks: - id: mypy additional_dependencies: [types-pyyaml, types-requests] diff --git a/site/package.json b/site/package.json index 42424f03..56169ad6 100644 --- a/site/package.json +++ b/site/package.json @@ -21,8 +21,8 @@ "@sveltejs/adapter-static": "^2.0.2", "@sveltejs/kit": "^1.16.3", "@sveltejs/vite-plugin-svelte": "^2.2.0", - "@typescript-eslint/eslint-plugin": "^5.59.5", - "@typescript-eslint/parser": "^5.59.5", + "@typescript-eslint/eslint-plugin": "^5.59.6", + "@typescript-eslint/parser": "^5.59.6", "elementari": "^0.1.8", "eslint": "^8.40.0", "eslint-plugin-svelte3": "^4.0.0", @@ -38,14 +38,14 @@ "remark-math": "3.0.0", "svelte": "^3.59.1", "svelte-check": "^3.3.2", - "svelte-multiselect": "^8.6.1", + "svelte-multiselect": "^8.6.2", "svelte-preprocess": "^5.0.3", "svelte-toc": "^0.5.5", "svelte-zoo": "^0.4.5", "svelte2tsx": "^0.6.14", "tslib": "^2.5.0", "typescript": "5.0.4", - "vite": "^4.3.5" + "vite": "^4.3.7" }, "prettier": { "semi": false, From ce1e60b22fba9986e9a67ad6560e637660bf1210 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:21:00 -0700 Subject: [PATCH 3/8] add figs/scatter-largest-errors-models-mean-vs-true-hull-dist-*.svelte split by chemistry includes: - all - borides - carbides - chalcogenides - halides - hydrides - nitrides - oxides - oxynitrides - pnictides - sulfides --- ...atter-largest-errors-models-mean-vs-true-hull-dist-all.svelte | 1 + ...r-largest-errors-models-mean-vs-true-hull-dist-borides.svelte | 1 + ...-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte | 1 + ...est-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte | 1 + ...r-largest-errors-models-mean-vs-true-hull-dist-halides.svelte | 1 + ...-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte | 1 + ...-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte | 1 + ...er-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte | 1 + ...rgest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte | 1 + ...largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte | 1 + ...-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte | 1 + .../scatter-largest-errors-models-mean-vs-true-hull-dist.svelte | 1 - 12 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte create mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte delete mode 100644 site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist.svelte diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte new file mode 100644 index 00000000..328773ae --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte new file mode 100644 index 00000000..b70d0183 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte new file mode 100644 index 00000000..90203362 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte new file mode 100644 index 00000000..ca7dc6b1 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte new file mode 100644 index 00000000..e1298829 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte new file mode 100644 index 00000000..dfd57412 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte new file mode 100644 index 00000000..cf241854 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte new file mode 100644 index 00000000..c877913f --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte new file mode 100644 index 00000000..3a17db75 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte new file mode 100644 index 00000000..decc4436 --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte new file mode 100644 index 00000000..78bb73ee --- /dev/null +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte @@ -0,0 +1 @@ +
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist.svelte deleted file mode 100644 index 3e99825a..00000000 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist.svelte +++ /dev/null @@ -1 +0,0 @@ -
From 0f040d26e415f8f73b0347296b76d6b0d70045f9 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:21:58 -0700 Subject: [PATCH 4/8] add largest-error-scatter-select.svelte and display in /si --- site/src/routes/si/+page.md | 4 +-- .../si/largest-error-scatter-select.svelte | 29 +++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) create mode 100644 site/src/routes/si/largest-error-scatter-select.svelte diff --git a/site/src/routes/si/+page.md b/site/src/routes/si/+page.md index bee6f050..c646fa9b 100644 --- a/site/src/routes/si/+page.md +++ b/site/src/routes/si/+page.md @@ -14,7 +14,7 @@ import HistClfPredHullDistModels from '$figs/hist-clf-pred-hull-dist-models-4x2.svelte' import SpacegroupSunburstWbm from '$figs/spacegroup-sunburst-wbm.svelte' import SpacegroupSunburstWrenformerFailures from '$figs/spacegroup-sunburst-wrenformer-failures.svelte' - import ScatterLargestErrorsModelsMeanVsTrueHullDist from '$figs/scatter-largest-errors-models-mean-vs-true-hull-dist.svelte' + import LargestErrorScatterSelect from './largest-error-scatter-select.svelte' import EAboveHullScatterWrenformerFailures from '$figs/e-above-hull-scatter-wrenformer-failures.svelte' import ProtoCountsWrenformerFailures from '$figs/proto-counts-wrenformer-failures.svelte' import ElementPrevalenceVsError from '$figs/element-prevalence-vs-error.svelte' @@ -99,7 +99,7 @@ Given its strong performance on batch 1, it is possible that given sufficiently ## Largest Errors vs DFT Hull Distance {#if mounted} - + {/if} > @label:fig:scatter-largest-errors-models-mean-vs-true-hull-dist DFT vs predicted hull distance (average over all models) for the 200 largest error structures colored by model disagreement (as measured by standard deviation in hull distance predictions from different models) and sized by number of atoms in the structures. This plot shows that high-error predictions are biased towards predicting too small hull distance. This is unsurprising considering MP training data mainly consists of low-energy structure.
diff --git a/site/src/routes/si/largest-error-scatter-select.svelte b/site/src/routes/si/largest-error-scatter-select.svelte new file mode 100644 index 00000000..17c415fc --- /dev/null +++ b/site/src/routes/si/largest-error-scatter-select.svelte @@ -0,0 +1,29 @@ + + + + + + + From fae24bb6fe131cf13cdbabc68832ec70d3f94419 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:22:23 -0700 Subject: [PATCH 5/8] fix element-errors-ptable-heatmap.svelte default cbar_max was order of magnitude too small --- site/src/routes/models/element-errors-ptable-heatmap.svelte | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site/src/routes/models/element-errors-ptable-heatmap.svelte b/site/src/routes/models/element-errors-ptable-heatmap.svelte index 7d88d31e..dfe60968 100644 --- a/site/src/routes/models/element-errors-ptable-heatmap.svelte +++ b/site/src/routes/models/element-errors-ptable-heatmap.svelte @@ -12,7 +12,7 @@ export let current_model: string[] = [models[2]] export let manual_cbar_max: boolean = false export let normalized: boolean = true - export let cbar_max: number | null = 0.03 + export let cbar_max: number | null = 0.3 const test_set_std_key = Object.keys(per_elem_errors).find((key) => key.includes(`Test set standard deviation`) @@ -61,7 +61,7 @@ convex hull. disabled={!manual_cbar_max} bind:value={cbar_max} min={0.01} - max={0.15} + max={0.7} step={0.001} /> {cbar_max} From de4413d9de76a6a60462ebb42d684c6b2177069f Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:23:09 -0700 Subject: [PATCH 6/8] acknowledge @pbenner for code testing and bug reporting --- site/src/routes/preprint/+page.md | 4 +++- site/src/routes/preprint/iclr-ml4mat/+page.md | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/site/src/routes/preprint/+page.md b/site/src/routes/preprint/+page.md index a19aaad7..e080ee10 100644 --- a/site/src/routes/preprint/+page.md +++ b/site/src/routes/preprint/+page.md @@ -243,7 +243,9 @@ We welcome further model submissions at Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain. -We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Ekin Dogus Cubuk, Bowen Deng, Tian Xie and Ryota Tomioka for helpful discussions. We also thank Hai-Chen Wang and co-authors for providing the initial structures for the WBM data set @wang_predicting_2021. +We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set @wang_predicting_2021. + +Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release. ## Author Contributions diff --git a/site/src/routes/preprint/iclr-ml4mat/+page.md b/site/src/routes/preprint/iclr-ml4mat/+page.md index ccd3dec9..d146b657 100644 --- a/site/src/routes/preprint/iclr-ml4mat/+page.md +++ b/site/src/routes/preprint/iclr-ml4mat/+page.md @@ -144,7 +144,9 @@ We welcome further model submissions as well as data contributions for version 2 Janosh Riebesell acknowledges support from the German Academic Scholarship Foundation ([Studienstiftung](https://wikipedia.org/wiki/Studienstiftung)) and gracious hosting as a visiting affiliate in the groups of Kristin Persson and Anubhav Jain. -We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Ekin Dogus Cubuk, Bowen Deng, Tian Xie and Ryota Tomioka for helpful discussions. We also thank Hai-Chen Wang and co-authors for providing the initial structures for the WBM data set. +We would like to thank Jason Blake Gibson, Shyue Ping Ong, Chi Chen, Tian Xie, Bowen Deng, Peichen Zhong, Ekin Dogus Cubuk for helpful discussions. We also thank Hai-Chen Wang and co-authors for creating and freely providing the WBM data set. + +Thanks also to [@pbenner](https://github.com/pbenner) for [finding and reporting many bugs]({repo}/issues?q=is%3Aissue+author%3Apbenner+) in the data loading and caching routines prior to the v1 release. ## Author Contributions From 5632ce0df45ddf534c2d3d1f61d02dee007921c3 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:45:32 -0700 Subject: [PATCH 7/8] fix eslint --- site/src/app.html | 2 +- site/src/routes/si/largest-error-scatter-select.svelte | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/site/src/app.html b/site/src/app.html index 63df4c54..f812a2b0 100644 --- a/site/src/app.html +++ b/site/src/app.html @@ -15,7 +15,7 @@ - + - {option.split('-').slice(-1)[0].split('.')[0]} + {option.split(`-`).slice(-1)[0].split(`.`)[0]} - {option.split('-').slice(-1)[0].split('.')[0]} + {option.split(`-`).slice(-1)[0].split(`.`)[0]} From bdce43666643bd99fa2d8d4fde156052b943f83d Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 18 May 2023 15:57:00 -0700 Subject: [PATCH 8/8] revert to 200 scatter points per plot --- scripts/analyze_model_disagreement.py | 7 ++++--- ...largest-errors-models-mean-vs-true-hull-dist-all.svelte | 2 +- ...est-errors-models-mean-vs-true-hull-dist-borides.svelte | 2 +- ...st-errors-models-mean-vs-true-hull-dist-carbides.svelte | 2 +- ...rors-models-mean-vs-true-hull-dist-chalcogenides.svelte | 2 +- ...est-errors-models-mean-vs-true-hull-dist-halides.svelte | 2 +- ...st-errors-models-mean-vs-true-hull-dist-hydrides.svelte | 2 +- ...st-errors-models-mean-vs-true-hull-dist-nitrides.svelte | 2 +- ...gest-errors-models-mean-vs-true-hull-dist-oxides.svelte | 2 +- ...errors-models-mean-vs-true-hull-dist-oxynitrides.svelte | 2 +- ...t-errors-models-mean-vs-true-hull-dist-pnictides.svelte | 2 +- ...st-errors-models-mean-vs-true-hull-dist-sulfides.svelte | 2 +- 12 files changed, 15 insertions(+), 14 deletions(-) diff --git a/scripts/analyze_model_disagreement.py b/scripts/analyze_model_disagreement.py index c5011623..9b295867 100644 --- a/scripts/analyze_model_disagreement.py +++ b/scripts/analyze_model_disagreement.py @@ -42,10 +42,11 @@ "hydrides": r".*H\d.*", "oxynitrides": r".*[ON]\d.*", } +n_structs = 200 + for material_cls, pattern in material_classes.items(): - df_oxides = df_preds[df_preds["formula"].str.match(pattern)] - n_structs = 100 - df_plot = df_oxides.nlargest(n_structs, model_mean_err_col).round(2) + df_subset = df_preds[df_preds["formula"].str.match(pattern)] + df_plot = df_subset.nlargest(n_structs, model_mean_err_col).round(2) fig = df_plot.plot.scatter( x=each_true_col, diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte index 328773ae..4861e7bd 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-all.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte index b70d0183..bbd2ac25 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-borides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte index 90203362..93ad41db 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-carbides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte index ca7dc6b1..6ad130c1 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-chalcogenides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte index e1298829..46c67297 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-halides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte index dfd57412..4742ad7e 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-hydrides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte index cf241854..6c7e1acc 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-nitrides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte index c877913f..21e4787e 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte index 3a17db75..be6a293f 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-oxynitrides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte index decc4436..91a9fe00 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-pnictides.svelte @@ -1 +1 @@ -
+
diff --git a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte index 78bb73ee..07bdd23a 100644 --- a/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte +++ b/site/src/figs/scatter-largest-errors-models-mean-vs-true-hull-dist-sulfides.svelte @@ -1 +1 @@ -
+