diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 3c41f408..f8623d98 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -4,7 +4,7 @@ on: pull_request: branches: [main] push: - branches: [main, site] + branches: [main] workflow_dispatch: # set permissions of GITHUB_TOKEN to allow deployment to GitHub Pages diff --git a/matbench_discovery/__init__.py b/matbench_discovery/__init__.py index c619b4a3..babc9db2 100644 --- a/matbench_discovery/__init__.py +++ b/matbench_discovery/__init__.py @@ -1,5 +1,6 @@ """Global variables used all across the matbench_discovery package.""" +import json import os import sys from datetime import datetime @@ -18,3 +19,10 @@ timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}" today = timestamp.split("@")[0] + +# load URLs from package.json + +with open(f"{ROOT}/site/package.json") as file: + pkg = json.load(file) + pypi_keys_to_npm = dict(Docs="homepage", Repo="repository", Package="package") + URLs = {key: pkg[val] for key, val in pypi_keys_to_npm.items()} diff --git a/matbench_discovery/data.py b/matbench_discovery/data.py index 6a1b36fc..2aeb1533 100644 --- a/matbench_discovery/data.py +++ b/matbench_discovery/data.py @@ -60,7 +60,7 @@ def load_train_test( Recognized data keys are mp-computed-structure-entries, mp-elemental-ref-energies, mp-energies, mp-patched-phase-diagram, wbm-computed-structure-entries, wbm-initial-structures, wbm-summary. See - https://matbench-discovery.janosh.dev/how-to-use for brief data descriptions. + https://janosh.github.io/matbench-discovery/how-to-contribute for brief data descriptions. Args: data_names (str | list[str], optional): Which parts of the MP/WBM dataset to load. @@ -140,11 +140,11 @@ def load_train_test( PRED_FILENAMES = { "CGCNN": "cgcnn/2022-11-23-test-cgcnn-wbm-IS2RE/cgcnn-ensemble-preds.csv", - "Voronoi RF": "voronoi/2022-11-27-train-test/e-form-preds-IS2RE.csv", + "Voronoi Random Forest": "voronoi/2022-11-27-train-test/e-form-preds-IS2RE.csv", "Wrenformer": "wrenformer/2022-11-15-wrenformer-IS2RE-preds.csv", "MEGNet": "megnet/2022-11-18-megnet-wbm-IS2RE/megnet-e-form-preds.csv", "M3GNet": "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv", - "BOWSR MEGNet": "bowsr/2022-11-22-bowsr-megnet-wbm-IS2RE.csv", + "BOWSR MEGNet": "bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.csv", } diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py index 4a1b8a04..dfda5257 100644 --- a/matbench_discovery/plots.py +++ b/matbench_discovery/plots.py @@ -612,7 +612,7 @@ def cumulative_precision_recall( # requires fixing index mismatch due to df sub-sampling above # customdata=dict( # df_cum.reset_index() - # .pivot(index="index", columns="metric")["Voronoi RF above hull pred"] + # .pivot(index="index", columns="metric") # .items() # ), **kwargs, diff --git a/models/bowsr/join_bowsr_results.py b/models/bowsr/join_bowsr_results.py index c277475a..4939d21c 100644 --- a/models/bowsr/join_bowsr_results.py +++ b/models/bowsr/join_bowsr_results.py @@ -57,6 +57,10 @@ ) +# %% remove redundant column after sanity check +df_bowsr = df_bowsr.drop(columns=[f"energy_bowsr_{energy_model}"]) + + # %% pymatviz.density_scatter( x=df_bowsr.e_form_per_atom_bowsr_megnet, @@ -71,5 +75,5 @@ # save energy and formation energy as CSV for fast loading df_bowsr.select_dtypes("number").to_csv(out_path.replace(".json.gz", ".csv")) -# in_path = f"{ROOT}/models/bowsr/2022-11-22-bowsr-megnet-wbm-IS2RE.json.gz" +in_path = f"{ROOT}/models/bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.json.gz" # df_bowsr = pd.read_json(in_path).set_index("material_id") diff --git a/readme.md b/readme.md index d2c9c031..57d9049a 100644 --- a/readme.md +++ b/readme.md @@ -13,7 +13,7 @@ Matbench Discovery -Matbench Discovery is an [interactive leaderboard](https://matbench-discovery.janosh.dev/figures) and associated [PyPI package](https://pypi.org/project/matbench-discovery) for benchmarking ML energy models on a task designed to closely emulate a real-world computational materials discovery workflow. In it, these models take on the role of a triaging step prior to DFT to decide how to allocate limited compute budget for structure relaxations. +Matbench Discovery is an [interactive leaderboard](https://janosh.github.io/matbench-discovery) and associated [PyPI package](https://pypi.org/project/matbench-discovery) for benchmarking ML energy models on a task designed to closely emulate a real-world computational materials discovery workflow. In it, these models take on the role of a triaging step prior to DFT to decide how to allocate limited compute budget for structure relaxations. We welcome contributions that add new models to the leaderboard through [GitHub PRs](https://github.com/janosh/matbench-discovery/pulls). See the [usage and contributing guide](https://janosh.github.io/matbench-discovery/how-to-contribute) for details. diff --git a/scripts/cumulative_clf_metrics.py b/scripts/cumulative_clf_metrics.py index 92793c9f..a1f99fd3 100644 --- a/scripts/cumulative_clf_metrics.py +++ b/scripts/cumulative_clf_metrics.py @@ -13,7 +13,7 @@ # %% models = ( # Wren, CGCNN IS2RE, CGCNN RS2RE, CGCNN - "Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet" + "Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet" ).split(", ") df_wbm = load_df_wbm_with_preds(models=models).round(3) diff --git a/scripts/hist_classified_stable_vs_hull_dist_models.py b/scripts/hist_classified_stable_vs_hull_dist_models.py index 8379c745..3440d669 100644 --- a/scripts/hist_classified_stable_vs_hull_dist_models.py +++ b/scripts/hist_classified_stable_vs_hull_dist_models.py @@ -23,7 +23,7 @@ # %% models = sorted( - "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ") + "CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ") ) df_wbm = load_df_wbm_with_preds(models=models).round(3) diff --git a/scripts/metrics_table.py b/scripts/metrics_table.py index 717d52d0..af88bdcf 100644 --- a/scripts/metrics_table.py +++ b/scripts/metrics_table.py @@ -26,7 +26,7 @@ display_name={"$regex": "cgcnn-robust-formation_energy_per_atom"}, ), ), - "Voronoi RF": dict( + "Voronoi Random Forest": dict( n_runs=68, filters=dict( created_at={"$gt": "2022-11-17", "$lt": "2022-11-28"}, @@ -100,7 +100,7 @@ # on 2022-11-28: -# run_times = {'Voronoi RF': 739608, +# run_times = {'Voronoi Random Forest': 739608, # 'Wrenformer': 208399, # 'MEGNet': 12396, # 'M3GNet': 301138, diff --git a/scripts/rolling_mae_vs_hull_dist_all_models.py b/scripts/rolling_mae_vs_hull_dist_all_models.py index dfe47b97..86888eed 100644 --- a/scripts/rolling_mae_vs_hull_dist_all_models.py +++ b/scripts/rolling_mae_vs_hull_dist_all_models.py @@ -11,7 +11,7 @@ # %% models = sorted( - "Wrenformer, CGCNN, Voronoi RF, MEGNet, M3GNet, BOWSR MEGNet".split(", ") + "Wrenformer, CGCNN, Voronoi Random Forest, MEGNet, M3GNet, BOWSR MEGNet".split(", ") ) e_form_col = "e_form_per_atom_mp2020_corrected" e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp" diff --git a/scripts/scatter_e_above_hull_models.py b/scripts/scatter_e_above_hull_models.py index e48a2151..bc3f21b2 100644 --- a/scripts/scatter_e_above_hull_models.py +++ b/scripts/scatter_e_above_hull_models.py @@ -16,7 +16,7 @@ # %% print(f"loadable models: {list(PRED_FILENAMES)}") models = sorted( - "CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ") + "CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ") ) df_wbm = load_df_wbm_with_preds(models=models).round(3) diff --git a/site/package.json b/site/package.json index 3c7c6347..c4f8a30f 100644 --- a/site/package.json +++ b/site/package.json @@ -4,6 +4,7 @@ "author": "Janosh Riebesell ", "homepage": "https://janosh.github.io/matbench-discovery", "repository": "https://github.com/janosh/matbench-discovery", + "package": "https://pypi.org/project/matbench-discovery", "license": "MIT", "type": "module", "bugs": "https://github.com/janosh/matbench-discovery/issues", diff --git a/site/src/app.css b/site/src/app.css index 3913a1ce..715093b2 100644 --- a/site/src/app.css +++ b/site/src/app.css @@ -167,3 +167,7 @@ caption { :target { animation: highlight-scroll-target 3s; } + +.pull-left { + margin-left: calc(0.8 * (-50vw + 50cqw)); +} diff --git a/site/src/lib/ModelCard.svelte b/site/src/lib/ModelCard.svelte index 20e7c8b6..5686d78f 100644 --- a/site/src/lib/ModelCard.svelte +++ b/site/src/lib/ModelCard.svelte @@ -1,12 +1,14 @@

{model_name}

@@ -21,45 +23,55 @@

Date added: {new Date(date_added).toISOString().split(`T`)[0]}  •  Benchmark version: {data.matbench_discovery_version} +  •  Missing predictions: + {pretty_num(missing_preds)} + ({((100 * missing_preds) / test_set_size).toFixed(2)}%)

-Authors -
- - Package versions - -
+
+
+

Authors

+
    + {#each data.authors as { name, email, orcid, affiliation, url }} +
  • + {name} + {#if email} + [email] + {/if} + {#if orcid} + [Orcid] + {/if} + {#if url} + [web] + {/if} +
  • + {/each} +
+
+
+

Package versions

+
    + {#each Object.entries(data.requirements) as [name, version]} +
  • + {#if ![`aviary`].includes(name)} + {@const href = `https://pypi.org/project/${name}/${version}`} + {name}: {version} + {:else} + {name}: {version} + {/if} +
  • + {/each} +
+
+
diff --git a/site/src/routes/models/+page.server.ts b/site/src/routes/models/+page.server.ts index 3adb4199..642ff38f 100644 --- a/site/src/routes/models/+page.server.ts +++ b/site/src/routes/models/+page.server.ts @@ -1,13 +1,19 @@ import type { ModelMetadata } from '$lib/types' import { dirname } from 'path' import type { PageServerLoad } from './$types' +import analysis from './2023-01-23-pred-analysis.json' export const load: PageServerLoad = async () => { - const models: [string, ModelMetadata][] = Object.entries( - import.meta.glob(`$root/models/**/metadata.yml`, { - eager: true, - }) - ).map(([key, module]) => [dirname(key), module.default]) + const yml = import.meta.glob(`$root/models/**/metadata.yml`, { + eager: true, + }) + const models: [string, ModelMetadata][] = Object.entries(yml).map( + ([key, module]) => { + const metadata = module.default as ModelMetadata + const computed = analysis[metadata.model_name] ?? {} + return [dirname(key), { ...metadata, ...computed }] + } + ) return { models } } diff --git a/site/src/routes/models/+page.svelte b/site/src/routes/models/+page.svelte index ed9648c3..7a947a20 100644 --- a/site/src/routes/models/+page.svelte +++ b/site/src/routes/models/+page.svelte @@ -5,10 +5,10 @@ export let data: PageData -

Models

+

Models

-
    - {#each data.models as [key, metadata], idx} +
      + {#each data.models as [key, metadata]}
    1. @@ -20,9 +20,11 @@ display: grid; gap: 2em; list-style: none; + grid-template-columns: repeat(auto-fit, minmax(400px, 1fr)); } ol > li { background-color: rgba(255, 255, 255, 0.05); - padding: 3pt 9pt 5pt; + padding: 3pt 10pt 7pt; + border-radius: 3pt; } diff --git a/site/src/routes/paper/+page.svx b/site/src/routes/paper/+page.svx index a7e70631..e1b033a9 100644 --- a/site/src/routes/paper/+page.svx +++ b/site/src/routes/paper/+page.svx @@ -49,8 +49,6 @@ date: Jan 31, 2023 import { References } from '$lib' import './heading-number.css' // CSS to auto-number headings import CumulativeClfMetrics from '$figs/2023-01-19-cumulative-clf-metrics.svelte' - - const style = "margin-left: calc(0.8 * (-50vw + 50cqw));" # {title}
      {subtitle} @@ -193,7 +191,7 @@ Our benchmark is designed to make [adding future models easy](/how-to-contribute
      {#if typeof document !== `undefined`} - + {/if}
      diff --git a/tests/test_data.py b/tests/test_data.py index f7b269cb..d49ff9e5 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -111,8 +111,11 @@ def test_load_train_test_doc_str() -> None: for name in DATA_FILENAMES: assert name in doc_str, f"Missing data {name=} in load_train_test() docstring" - # TODO refactor to load site URL from site/package.json for SSoT - assert "https://matbench-discovery.janosh.dev" in doc_str + route = "/how-to-contribute" + from matbench_discovery import URLs + + assert f"{URLs['Docs']}{route}" in doc_str + assert os.path.isdir(f"{ROOT}/site/src/routes/{route}") @pytest.mark.skipif(website_down, reason=f"{RAW_REPO_URL} unreachable") diff --git a/tests/test_plots.py b/tests/test_plots.py index ad95cd78..819fe1f0 100644 --- a/tests/test_plots.py +++ b/tests/test_plots.py @@ -15,7 +15,7 @@ rolling_mae_vs_hull_dist, ) -models = ["Wrenformer", "CGCNN", "Voronoi RF"] +models = ["Wrenformer", "CGCNN", "Voronoi Random Forest"] df_wbm = load_df_wbm_with_preds(models=models, nrows=100) e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp" e_form_col = "e_form_per_atom_mp2020_corrected"