Skip to content

Commit

Permalink
SSoT for projects URLs in site/package.json
Browse files Browse the repository at this point in the history
add site/src/routes/models/analyze_preds.py to display missing preds in ModelCard
  • Loading branch information
janosh committed Jun 20, 2023
1 parent 2f6fae3 commit a4b37a3
Show file tree
Hide file tree
Showing 19 changed files with 107 additions and 64 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/gh-pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on:
pull_request:
branches: [main]
push:
branches: [main, site]
branches: [main]
workflow_dispatch:

# set permissions of GITHUB_TOKEN to allow deployment to GitHub Pages
Expand Down
8 changes: 8 additions & 0 deletions matbench_discovery/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Global variables used all across the matbench_discovery package."""

import json
import os
import sys
from datetime import datetime
Expand All @@ -18,3 +19,10 @@

timestamp = f"{datetime.now():%Y-%m-%d@%H-%M-%S}"
today = timestamp.split("@")[0]

# load URLs from package.json

with open(f"{ROOT}/site/package.json") as file:
pkg = json.load(file)
pypi_keys_to_npm = dict(Docs="homepage", Repo="repository", Package="package")
URLs = {key: pkg[val] for key, val in pypi_keys_to_npm.items()}
6 changes: 3 additions & 3 deletions matbench_discovery/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def load_train_test(
Recognized data keys are mp-computed-structure-entries, mp-elemental-ref-energies,
mp-energies, mp-patched-phase-diagram, wbm-computed-structure-entries,
wbm-initial-structures, wbm-summary. See
https://matbench-discovery.janosh.dev/how-to-use for brief data descriptions.
https://janosh.github.io/matbench-discovery/how-to-contribute for brief data descriptions.
Args:
data_names (str | list[str], optional): Which parts of the MP/WBM dataset to load.
Expand Down Expand Up @@ -140,11 +140,11 @@ def load_train_test(

PRED_FILENAMES = {
"CGCNN": "cgcnn/2022-11-23-test-cgcnn-wbm-IS2RE/cgcnn-ensemble-preds.csv",
"Voronoi RF": "voronoi/2022-11-27-train-test/e-form-preds-IS2RE.csv",
"Voronoi Random Forest": "voronoi/2022-11-27-train-test/e-form-preds-IS2RE.csv",
"Wrenformer": "wrenformer/2022-11-15-wrenformer-IS2RE-preds.csv",
"MEGNet": "megnet/2022-11-18-megnet-wbm-IS2RE/megnet-e-form-preds.csv",
"M3GNet": "m3gnet/2022-10-31-m3gnet-wbm-IS2RE.csv",
"BOWSR MEGNet": "bowsr/2022-11-22-bowsr-megnet-wbm-IS2RE.csv",
"BOWSR MEGNet": "bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.csv",
}


Expand Down
2 changes: 1 addition & 1 deletion matbench_discovery/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,7 +612,7 @@ def cumulative_precision_recall(
# requires fixing index mismatch due to df sub-sampling above
# customdata=dict(
# df_cum.reset_index()
# .pivot(index="index", columns="metric")["Voronoi RF above hull pred"]
# .pivot(index="index", columns="metric")
# .items()
# ),
**kwargs,
Expand Down
6 changes: 5 additions & 1 deletion models/bowsr/join_bowsr_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@
)


# %% remove redundant column after sanity check
df_bowsr = df_bowsr.drop(columns=[f"energy_bowsr_{energy_model}"])


# %%
pymatviz.density_scatter(
x=df_bowsr.e_form_per_atom_bowsr_megnet,
Expand All @@ -71,5 +75,5 @@
# save energy and formation energy as CSV for fast loading
df_bowsr.select_dtypes("number").to_csv(out_path.replace(".json.gz", ".csv"))

# in_path = f"{ROOT}/models/bowsr/2022-11-22-bowsr-megnet-wbm-IS2RE.json.gz"
in_path = f"{ROOT}/models/bowsr/2023-01-23-bowsr-megnet-wbm-IS2RE.json.gz"
# df_bowsr = pd.read_json(in_path).set_index("material_id")
2 changes: 1 addition & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Matbench Discovery

</h4>

Matbench Discovery is an [interactive leaderboard](https://matbench-discovery.janosh.dev/figures) and associated [PyPI package](https://pypi.org/project/matbench-discovery) for benchmarking ML energy models on a task designed to closely emulate a real-world computational materials discovery workflow. In it, these models take on the role of a triaging step prior to DFT to decide how to allocate limited compute budget for structure relaxations.
Matbench Discovery is an [interactive leaderboard](https://janosh.github.io/matbench-discovery) and associated [PyPI package](https://pypi.org/project/matbench-discovery) for benchmarking ML energy models on a task designed to closely emulate a real-world computational materials discovery workflow. In it, these models take on the role of a triaging step prior to DFT to decide how to allocate limited compute budget for structure relaxations.

We welcome contributions that add new models to the leaderboard through [GitHub PRs](https://github.com/janosh/matbench-discovery/pulls). See the [usage and contributing guide](https://janosh.github.io/matbench-discovery/how-to-contribute) for details.

Expand Down
2 changes: 1 addition & 1 deletion scripts/cumulative_clf_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# %%
models = (
# Wren, CGCNN IS2RE, CGCNN RS2RE, CGCNN
"Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
"Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet"
).split(", ")

df_wbm = load_df_wbm_with_preds(models=models).round(3)
Expand Down
2 changes: 1 addition & 1 deletion scripts/hist_classified_stable_vs_hull_dist_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

# %%
models = sorted(
"CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
"CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
)
df_wbm = load_df_wbm_with_preds(models=models).round(3)

Expand Down
4 changes: 2 additions & 2 deletions scripts/metrics_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
display_name={"$regex": "cgcnn-robust-formation_energy_per_atom"},
),
),
"Voronoi RF": dict(
"Voronoi Random Forest": dict(
n_runs=68,
filters=dict(
created_at={"$gt": "2022-11-17", "$lt": "2022-11-28"},
Expand Down Expand Up @@ -100,7 +100,7 @@


# on 2022-11-28:
# run_times = {'Voronoi RF': 739608,
# run_times = {'Voronoi Random Forest': 739608,
# 'Wrenformer': 208399,
# 'MEGNet': 12396,
# 'M3GNet': 301138,
Expand Down
2 changes: 1 addition & 1 deletion scripts/rolling_mae_vs_hull_dist_all_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

# %%
models = sorted(
"Wrenformer, CGCNN, Voronoi RF, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
"Wrenformer, CGCNN, Voronoi Random Forest, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
)
e_form_col = "e_form_per_atom_mp2020_corrected"
e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"
Expand Down
2 changes: 1 addition & 1 deletion scripts/scatter_e_above_hull_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
# %%
print(f"loadable models: {list(PRED_FILENAMES)}")
models = sorted(
"CGCNN, Voronoi RF, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
"CGCNN, Voronoi Random Forest, Wrenformer, MEGNet, M3GNet, BOWSR MEGNet".split(", ")
)
df_wbm = load_df_wbm_with_preds(models=models).round(3)

Expand Down
1 change: 1 addition & 0 deletions site/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"author": "Janosh Riebesell <janosh.riebesell@gmail.com>",
"homepage": "https://janosh.github.io/matbench-discovery",
"repository": "https://github.com/janosh/matbench-discovery",
"package": "https://pypi.org/project/matbench-discovery",
"license": "MIT",
"type": "module",
"bugs": "https://github.com/janosh/matbench-discovery/issues",
Expand Down
4 changes: 4 additions & 0 deletions site/src/app.css
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,7 @@ caption {
:target {
animation: highlight-scroll-target 3s;
}

.pull-left {
margin-left: calc(0.8 * (-50vw + 50cqw));
}
89 changes: 53 additions & 36 deletions site/src/lib/ModelCard.svelte
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
<script lang="ts">
import { repository } from '$site/package.json'
import Icon from '@iconify/svelte'
import { pretty_num } from 'sveriodic-table/labels'
import type { ModelMetadata } from './types'
export let key: string
export let data: ModelMetadata
const { model_name, repo, doi, preprint, url, date_added } = data
const { missing_preds, test_set_size } = data
</script>

<h2>{model_name}</h2>
Expand All @@ -21,45 +23,55 @@
<p>
Date added: {new Date(date_added).toISOString().split(`T`)[0]}
&nbsp;&bull;&nbsp; Benchmark version: {data.matbench_discovery_version}
&nbsp;&bull;&nbsp; Missing predictions:
{pretty_num(missing_preds)}
<small>({((100 * missing_preds) / test_set_size).toFixed(2)}%)</small>
</p>
<strong>Authors</strong>
<section>
<ul>
{#each data.authors as { name, email, orcid, affiliation, url }}
<li>
<span title={affiliation}>{name}</span>
{#if email}
[<a href="mailto:{email}">email</a>]
{/if}
{#if orcid}
[<a href={orcid}>Orcid</a>]
{/if}
{#if url}
[<a href={url}>web</a>]
{/if}
</li>
{/each}
</ul>
<strong>Package versions</strong>
<ul>
{#each Object.entries(data.requirements) as [name, version]}
<li>
{#if ![`aviary`].includes(name)}
{@const href = `https://pypi.org/project/${name}/${version}`}
{name}: <a {href}>{version}</a>
{:else}
{name}: {version}
{/if}
</li>
{/each}
</ul>
</section>
<div>
<section>
<h3>Authors</h3>
<ul>
{#each data.authors as { name, email, orcid, affiliation, url }}
<li>
<span title={affiliation}>{name}</span>
{#if email}
[<a href="mailto:{email}">email</a>]
{/if}
{#if orcid}
[<a href={orcid}>Orcid</a>]
{/if}
{#if url}
[<a href={url}>web</a>]
{/if}
</li>
{/each}
</ul>
</section>
<section>
<h3>Package versions</h3>
<ul>
{#each Object.entries(data.requirements) as [name, version]}
<li>
{#if ![`aviary`].includes(name)}
{@const href = `https://pypi.org/project/${name}/${version}`}
{name}: <a {href}>{version}</a>
{:else}
{name}: {version}
{/if}
</li>
{/each}
</ul>
</section>
</div>

<!-- TODO add table with performance metrics (F1, Acc, Recall, Precision) for each model -->
<style>
h2 {
margin: 5pt 0 1ex;
}
h3 {
margin: 0;
}
ul {
list-style-type: disc;
}
Expand All @@ -71,11 +83,16 @@
}
nav > span {
display: flex;
gap: 0.5em;
gap: 6pt;
place-items: center;
}
strong {
display: block;
margin: 1em 0 5pt;
div {
display: flex;
gap: 15pt;
margin: 1em 0;
justify-content: space-between;
}
small {
font-weight: lighter;
}
</style>
16 changes: 11 additions & 5 deletions site/src/routes/models/+page.server.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import type { ModelMetadata } from '$lib/types'
import { dirname } from 'path'
import type { PageServerLoad } from './$types'
import analysis from './2023-01-23-pred-analysis.json'

export const load: PageServerLoad = async () => {
const models: [string, ModelMetadata][] = Object.entries(
import.meta.glob(`$root/models/**/metadata.yml`, {
eager: true,
})
).map(([key, module]) => [dirname(key), module.default])
const yml = import.meta.glob(`$root/models/**/metadata.yml`, {
eager: true,
})
const models: [string, ModelMetadata][] = Object.entries(yml).map(
([key, module]) => {
const metadata = module.default as ModelMetadata
const computed = analysis[metadata.model_name] ?? {}
return [dirname(key), { ...metadata, ...computed }]
}
)

return { models }
}
10 changes: 6 additions & 4 deletions site/src/routes/models/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
export let data: PageData
</script>

<h1>Models</h1>
<h1 class="pull-left">Models</h1>

<ol>
{#each data.models as [key, metadata], idx}
<ol class="pull-left">
{#each data.models as [key, metadata]}
<li>
<ModelCard {key} data={metadata} />
</li>
Expand All @@ -20,9 +20,11 @@
display: grid;
gap: 2em;
list-style: none;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
}
ol > li {
background-color: rgba(255, 255, 255, 0.05);
padding: 3pt 9pt 5pt;
padding: 3pt 10pt 7pt;
border-radius: 3pt;
}
</style>
4 changes: 1 addition & 3 deletions site/src/routes/paper/+page.svx
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@ date: Jan 31, 2023
import { References } from '$lib'
import './heading-number.css' // CSS to auto-number headings
import CumulativeClfMetrics from '$figs/2023-01-19-cumulative-clf-metrics.svelte'

const style = "margin-left: calc(0.8 * (-50vw + 50cqw));"
</script>

# {title}<br><small>{subtitle}</small>
Expand Down Expand Up @@ -193,7 +191,7 @@ Our benchmark is designed to make [adding future models easy](/how-to-contribute

<div>
{#if typeof document !== `undefined`}
<CumulativeClfMetrics {style} />
<CumulativeClfMetrics class="pull-left" />
{/if}
</div>

Expand Down
7 changes: 5 additions & 2 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,11 @@ def test_load_train_test_doc_str() -> None:
for name in DATA_FILENAMES:
assert name in doc_str, f"Missing data {name=} in load_train_test() docstring"

# TODO refactor to load site URL from site/package.json for SSoT
assert "https://matbench-discovery.janosh.dev" in doc_str
route = "/how-to-contribute"
from matbench_discovery import URLs

assert f"{URLs['Docs']}{route}" in doc_str
assert os.path.isdir(f"{ROOT}/site/src/routes/{route}")


@pytest.mark.skipif(website_down, reason=f"{RAW_REPO_URL} unreachable")
Expand Down
2 changes: 1 addition & 1 deletion tests/test_plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
rolling_mae_vs_hull_dist,
)

models = ["Wrenformer", "CGCNN", "Voronoi RF"]
models = ["Wrenformer", "CGCNN", "Voronoi Random Forest"]
df_wbm = load_df_wbm_with_preds(models=models, nrows=100)
e_above_hull_col = "e_above_hull_mp2020_corrected_ppd_mp"
e_form_col = "e_form_per_atom_mp2020_corrected"
Expand Down

0 comments on commit a4b37a3

Please sign in to comment.