Skip to content

Commit

Permalink
remove df_to_pdf and normalize_and_crop_pdf from matbench_discovery/p…
Browse files Browse the repository at this point in the history
…lots.py

now imported from pymatviz
  • Loading branch information
janosh committed Oct 10, 2023
1 parent 6156c27 commit 071174a
Show file tree
Hide file tree
Showing 20 changed files with 34 additions and 208 deletions.
2 changes: 1 addition & 1 deletion data/wbm/eda.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
ptable_heatmap_plotly,
spacegroup_sunburst,
)
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS, ROOT, SITE_FIGS, STABILITY_THRESHOLD
from matbench_discovery import plots as plots
Expand Down
2 changes: 1 addition & 1 deletion data/wbm/fetch_process_wbm_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
)
from pymatgen.entries.computed_entries import ComputedStructureEntry
from pymatviz import density_scatter
from pymatviz.utils import save_fig
from pymatviz.io import save_fig
from tqdm import tqdm

from matbench_discovery import SITE_FIGS, today
Expand Down
83 changes: 0 additions & 83 deletions matbench_discovery/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

import functools
import math
import os
import subprocess
from collections import defaultdict
from collections.abc import Sequence
from pathlib import Path
Expand Down Expand Up @@ -923,84 +921,3 @@ def df_to_svelte_table(
styled_table = html_table.replace("</style>", f"{styles}</style>")
with open(file_path, "w") as file:
file.write(styled_table)


def df_to_pdf(
styler: Styler, file_path: str | Path, crop: bool = True, **kwargs: Any
) -> None:
"""Export a pandas Styler to PDF with WeasyPrint.
Args:
styler (Styler): Styler object to export.
file_path (str): Path to save the PDF to. Requires WeasyPrint.
crop (bool): Whether to crop the PDF margins. Requires pdfCropMargins.
Defaults to True.
**kwargs: Keyword arguments passed to Styler.to_html().
"""
try:
from weasyprint import HTML
except ImportError as exc:
msg = "weasyprint not installed\nrun pip install weasyprint"
raise ImportError(msg) from exc

html_str = styler.to_html(**kwargs)

# CSS to adjust layout and margins
html_str = f"""
<style>
@page {{ size: landscape; margin: 1cm; }}
body {{ margin: 0; padding: 1em; }}
</style>
{html_str}
"""

html = HTML(string=html_str)

html.write_pdf(file_path)

if crop:
normalize_and_crop_pdf(file_path)


def normalize_and_crop_pdf(file_path: str | Path) -> None:
"""Normalize a PDF using Ghostscript and then crop it.
Without gs normalization, pdfCropMargins sometimes corrupts the PDF.
Args:
file_path (str | Path): Path to the PDF file.
"""
try:
normalized_file_path = f"{file_path}_normalized.pdf"
from pdfCropMargins import crop

# Normalize the PDF with Ghostscript
subprocess.run(
[
*"gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4".split(),
*"-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH".split(),
f"-sOutputFile={normalized_file_path}",
str(file_path),
],
check=True,
)

# Crop the normalized PDF
cropped_file_path, exit_code, stdout, stderr = crop(
["--percentRetain", "0", normalized_file_path]
)

if stderr:
print(f"pdfCropMargins {stderr=}")
# something went wrong, remove the cropped PDF
os.remove(cropped_file_path)
else:
# replace the original PDF with the cropped one
os.replace(cropped_file_path, str(file_path))

os.remove(normalized_file_path)

except ImportError as exc:
msg = "pdfCropMargins not installed\nrun pip install pdfCropMargins"
raise ImportError(msg) from exc
except Exception as exc:
raise RuntimeError("Error cropping PDF margins") from exc
2 changes: 1 addition & 1 deletion models/chgnet/analyze_chgnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import pandas as pd
from pymatgen.core import Structure
from pymatviz import density_scatter, plot_structure_2d, ptable_heatmap_plotly
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS
from matbench_discovery import plots as plots
Expand Down
2 changes: 1 addition & 1 deletion models/mace/analyze_mace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import pandas as pd
from pymatviz import density_scatter, ptable_heatmap_plotly, spacegroup_sunburst
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import plots as plots
from matbench_discovery.data import df_wbm
Expand Down
3 changes: 2 additions & 1 deletion models/wrenformer/analyze_wrenformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@
import pandas as pd
from aviary.wren.utils import get_isopointal_proto_from_aflow
from pymatviz import spacegroup_hist, spacegroup_sunburst
from pymatviz.io import df_to_pdf
from pymatviz.ptable import ptable_heatmap_plotly
from pymatviz.utils import add_identity_line, bin_df_cols, save_fig

from matbench_discovery import PDF_FIGS, SITE_FIGS
from matbench_discovery.data import DATA_FILES, df_wbm
from matbench_discovery.plots import df_to_pdf, df_to_svelte_table
from matbench_discovery.plots import df_to_svelte_table
from matbench_discovery.preds import df_each_pred, df_preds, each_true_col

__author__ = "Janosh Riebesell"
Expand Down
10 changes: 3 additions & 7 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,10 @@ requires-python = ">=3.9"
dependencies = [
"matplotlib",
"numpy",
# tmp: shouldn't be needed, used to be included in output_formatting
"jinja2",
# output_formatting needed for pandas Stylers
# see https://github.com/pandas-dev/pandas/blob/-/pyproject.toml
"pandas[output_formatting]>=2.0.0",
"pandas>=2.0.0",
"plotly",
"pymatgen",
"pymatviz[export-figs]",
"pymatviz[export-figs,df-pdf-export]",
"scikit-learn",
"scipy",
"tqdm",
Expand Down Expand Up @@ -69,8 +65,8 @@ running-models = [
"megnet",
]
3d-structures = ["crystaltoolkit"]
df-to-pdf = ["jinja2"]
fetch-data = ["gdown"]
df-pdf-export = ["pdfCropMargins", "weasyprint"]

[tool.setuptools.packages.find]
include = ["matbench_discovery*"]
Expand Down
2 changes: 1 addition & 1 deletion scripts/analyze_model_failure_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import plotly.graph_objs as go
from pymatgen.core import Composition, Structure
from pymatviz import count_elements, plot_structure_2d, ptable_heatmap_plotly
from pymatviz.utils import save_fig
from pymatviz.io import save_fig
from tqdm import tqdm

from matbench_discovery import PDF_FIGS, ROOT, SITE_FIGS
Expand Down
2 changes: 1 addition & 1 deletion scripts/hist_classified_stable_vs_hull_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
# %%
from typing import Final

from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS
from matbench_discovery.data import df_wbm
Expand Down
2 changes: 1 addition & 1 deletion scripts/hist_classified_stable_vs_hull_dist_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing import Final

import pandas as pd
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS
from matbench_discovery.plots import hist_classified_stable_vs_hull_dist
Expand Down
2 changes: 1 addition & 1 deletion scripts/model_figs/cumulative_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

# %%
import pandas as pd
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS, SITE_FIGS
from matbench_discovery.plots import cumulative_metrics
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import math
from typing import Final

from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS, SITE_FIGS, today
from matbench_discovery.plots import hist_classified_stable_vs_hull_dist, plt
Expand Down
13 changes: 7 additions & 6 deletions scripts/model_figs/make_metrics_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,14 @@

import numpy as np
import pandas as pd
from pymatviz.io import df_to_pdf
from sklearn.dummy import DummyClassifier

from matbench_discovery import PDF_FIGS, SITE_FIGS
from matbench_discovery.data import DATA_FILES, df_wbm
from matbench_discovery.metrics import stable_metrics
from matbench_discovery.models import MODEL_METADATA
from matbench_discovery.plots import df_to_pdf, df_to_svelte_table
from matbench_discovery.plots import df_to_svelte_table
from matbench_discovery.preds import df_metrics, df_metrics_10k, each_true_col

__author__ = "Janosh Riebesell"
Expand All @@ -25,7 +26,7 @@
"M3GNet→MEGNet": "M3GNet",
"CHGNet→MEGNet": "CHGNet",
}
train_size_col = "training size"
train_size_col = "Training Size"
df_metrics.loc[train_size_col] = df_metrics_10k.loc[train_size_col] = ""
for model in df_metrics:
model_name = name_map.get(model, model)
Expand Down Expand Up @@ -62,7 +63,7 @@
df_metrics_10k["Dummy"] = dummy_metrics


# %% for each model this ontology dict specifies (training type, test type, model class)
# %% for each model this ontology dict specifies (training type, test type, model type)
ontology = {
"ALIGNN": ("RS2RE", "IS2RE", "GNN"),
# "ALIGNN Pretrained": ("RS2RE", "IS2RE", "GNN"),
Expand All @@ -80,7 +81,7 @@
"CHGNet→MEGNet": ("S2EFSM", "IS2RE-SR", "UIP-GNN"),
"Dummy": ("", "", ""),
}
ontology_cols = ["Trained", "Deployed", "Model Class"]
ontology_cols = ["Trained", "Deployed", model_type_col := "Model Type"]
df_ont = pd.DataFrame(ontology, index=ontology_cols)
# RS2RE = relaxed structure to relaxed energy
# RP2RE = relaxed prototype to predicted energy
Expand All @@ -104,7 +105,7 @@
make_uip_megnet_comparison = False
show_cols = (
f"F1,DAF,Precision,Accuracy,TPR,TNR,MAE,RMSE,{R2_col},"
"training size,Model Class".split(",")
f"{train_size_col},{model_type_col}".split(",")
)

for label, df in (("-first-10k", df_metrics_10k), ("", df_metrics)):
Expand Down Expand Up @@ -160,7 +161,7 @@
)
try:
df_to_pdf(styler, f"{PDF_FIGS}/metrics-table{label}.pdf")
except ImportError as exc:
except (ImportError, RuntimeError) as exc:
print(f"df_to_pdf failed: {exc}")


Expand Down
2 changes: 1 addition & 1 deletion scripts/model_figs/model_run_times.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import requests
import wandb
import wandb.apis.public
from pymatviz.utils import save_fig
from pymatviz.io import save_fig
from tqdm import tqdm

from matbench_discovery import PDF_FIGS, SITE_FIGS, SITE_MODELS, WANDB_PATH
Expand Down
2 changes: 1 addition & 1 deletion scripts/model_figs/roc_prc_curves_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import math

import pandas as pd
from pymatviz.utils import save_fig
from pymatviz.io import save_fig
from sklearn.metrics import auc, precision_recall_curve, roc_curve
from tqdm import tqdm

Expand Down
2 changes: 1 addition & 1 deletion scripts/model_figs/rolling_mae_vs_hull_dist_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np
import plotly.graph_objects as go
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS, SITE_FIGS
from matbench_discovery.plots import rolling_mae_vs_hull_dist
Expand Down
2 changes: 1 addition & 1 deletion scripts/rolling_mae_vs_hull_dist.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


# %%
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS, today
from matbench_discovery.plots import rolling_mae_vs_hull_dist
Expand Down
2 changes: 1 addition & 1 deletion scripts/rolling_mae_vs_hull_dist_wbm_batches.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


# %%
from pymatviz.utils import save_fig
from pymatviz.io import save_fig

from matbench_discovery import PDF_FIGS, SITE_FIGS, today
from matbench_discovery.plots import plt, rolling_mae_vs_hull_dist
Expand Down
Loading

0 comments on commit 071174a

Please sign in to comment.