remove df_to_pdf and normalize_and_crop_pdf from matbench_discovery/p…

…lots.py now imported from pymatviz
janosh · Oct 10, 2023 · 071174a · 071174a
1 parent 6156c27
commit 071174a
Show file tree

Hide file tree

Showing 20 changed files with 34 additions and 208 deletions.
diff --git a/data/wbm/eda.py b/data/wbm/eda.py
@@ -11,7 +11,7 @@
     ptable_heatmap_plotly,
     spacegroup_sunburst,
 )
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS, ROOT, SITE_FIGS, STABILITY_THRESHOLD
 from matbench_discovery import plots as plots

diff --git a/data/wbm/fetch_process_wbm_dataset.py b/data/wbm/fetch_process_wbm_dataset.py
@@ -15,7 +15,7 @@
 )
 from pymatgen.entries.computed_entries import ComputedStructureEntry
 from pymatviz import density_scatter
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 from tqdm import tqdm
 
 from matbench_discovery import SITE_FIGS, today

diff --git a/matbench_discovery/plots.py b/matbench_discovery/plots.py
@@ -4,8 +4,6 @@
 
 import functools
 import math
-import os
-import subprocess
 from collections import defaultdict
 from collections.abc import Sequence
 from pathlib import Path
@@ -923,84 +921,3 @@ def df_to_svelte_table(
     styled_table = html_table.replace("</style>", f"{styles}</style>")
     with open(file_path, "w") as file:
         file.write(styled_table)
-
-
-def df_to_pdf(
-    styler: Styler, file_path: str | Path, crop: bool = True, **kwargs: Any
-) -> None:
-    """Export a pandas Styler to PDF with WeasyPrint.
-
-    Args:
-        styler (Styler): Styler object to export.
-        file_path (str): Path to save the PDF to. Requires WeasyPrint.
-        crop (bool): Whether to crop the PDF margins. Requires pdfCropMargins.
-            Defaults to True.
-        **kwargs: Keyword arguments passed to Styler.to_html().
-    """
-    try:
-        from weasyprint import HTML
-    except ImportError as exc:
-        msg = "weasyprint not installed\nrun pip install weasyprint"
-        raise ImportError(msg) from exc
-
-    html_str = styler.to_html(**kwargs)
-
-    # CSS to adjust layout and margins
-    html_str = f"""
-    <style>
-        @page {{ size: landscape; margin: 1cm; }}
-        body {{ margin: 0; padding: 1em; }}
-    </style>
-    {html_str}
-    """
-
-    html = HTML(string=html_str)
-
-    html.write_pdf(file_path)
-
-    if crop:
-        normalize_and_crop_pdf(file_path)
-
-
-def normalize_and_crop_pdf(file_path: str | Path) -> None:
-    """Normalize a PDF using Ghostscript and then crop it.
-    Without gs normalization, pdfCropMargins sometimes corrupts the PDF.
-
-    Args:
-        file_path (str | Path): Path to the PDF file.
-    """
-    try:
-        normalized_file_path = f"{file_path}_normalized.pdf"
-        from pdfCropMargins import crop
-
-        # Normalize the PDF with Ghostscript
-        subprocess.run(
-            [
-                *"gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4".split(),
-                *"-dPDFSETTINGS=/default -dNOPAUSE -dQUIET -dBATCH".split(),
-                f"-sOutputFile={normalized_file_path}",
-                str(file_path),
-            ],
-            check=True,
-        )
-
-        # Crop the normalized PDF
-        cropped_file_path, exit_code, stdout, stderr = crop(
-            ["--percentRetain", "0", normalized_file_path]
-        )
-
-        if stderr:
-            print(f"pdfCropMargins {stderr=}")
-            # something went wrong, remove the cropped PDF
-            os.remove(cropped_file_path)
-        else:
-            # replace the original PDF with the cropped one
-            os.replace(cropped_file_path, str(file_path))
-
-        os.remove(normalized_file_path)
-
-    except ImportError as exc:
-        msg = "pdfCropMargins not installed\nrun pip install pdfCropMargins"
-        raise ImportError(msg) from exc
-    except Exception as exc:
-        raise RuntimeError("Error cropping PDF margins") from exc
diff --git a/models/chgnet/analyze_chgnet.py b/models/chgnet/analyze_chgnet.py
@@ -8,7 +8,7 @@
 import pandas as pd
 from pymatgen.core import Structure
 from pymatviz import density_scatter, plot_structure_2d, ptable_heatmap_plotly
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS
 from matbench_discovery import plots as plots

diff --git a/models/mace/analyze_mace.py b/models/mace/analyze_mace.py
@@ -6,7 +6,7 @@
 
 import pandas as pd
 from pymatviz import density_scatter, ptable_heatmap_plotly, spacegroup_sunburst
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import plots as plots
 from matbench_discovery.data import df_wbm

diff --git a/models/wrenformer/analyze_wrenformer.py b/models/wrenformer/analyze_wrenformer.py
@@ -6,12 +6,13 @@
 import pandas as pd
 from aviary.wren.utils import get_isopointal_proto_from_aflow
 from pymatviz import spacegroup_hist, spacegroup_sunburst
+from pymatviz.io import df_to_pdf
 from pymatviz.ptable import ptable_heatmap_plotly
 from pymatviz.utils import add_identity_line, bin_df_cols, save_fig
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS
 from matbench_discovery.data import DATA_FILES, df_wbm
-from matbench_discovery.plots import df_to_pdf, df_to_svelte_table
+from matbench_discovery.plots import df_to_svelte_table
 from matbench_discovery.preds import df_each_pred, df_preds, each_true_col
 
 __author__ = "Janosh Riebesell"

diff --git a/pyproject.toml b/pyproject.toml
@@ -34,14 +34,10 @@ requires-python = ">=3.9"
 dependencies = [
   "matplotlib",
   "numpy",
-  # tmp: shouldn't be needed, used to be included in output_formatting
-  "jinja2",
-  # output_formatting needed for pandas Stylers
-  # see https://github.com/pandas-dev/pandas/blob/-/pyproject.toml
-  "pandas[output_formatting]>=2.0.0",
+  "pandas>=2.0.0",
   "plotly",
   "pymatgen",
-  "pymatviz[export-figs]",
+  "pymatviz[export-figs,df-pdf-export]",
   "scikit-learn",
   "scipy",
   "tqdm",
@@ -69,8 +65,8 @@ running-models = [
   "megnet",
 ]
 3d-structures = ["crystaltoolkit"]
+df-to-pdf = ["jinja2"]
 fetch-data = ["gdown"]
-df-pdf-export = ["pdfCropMargins", "weasyprint"]
 
 [tool.setuptools.packages.find]
 include = ["matbench_discovery*"]

diff --git a/scripts/analyze_model_failure_cases.py b/scripts/analyze_model_failure_cases.py
@@ -13,7 +13,7 @@
 import plotly.graph_objs as go
 from pymatgen.core import Composition, Structure
 from pymatviz import count_elements, plot_structure_2d, ptable_heatmap_plotly
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 from tqdm import tqdm
 
 from matbench_discovery import PDF_FIGS, ROOT, SITE_FIGS

diff --git a/scripts/hist_classified_stable_vs_hull_dist.py b/scripts/hist_classified_stable_vs_hull_dist.py
@@ -9,7 +9,7 @@
 # %%
 from typing import Final
 
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS
 from matbench_discovery.data import df_wbm

diff --git a/scripts/hist_classified_stable_vs_hull_dist_batches.py b/scripts/hist_classified_stable_vs_hull_dist_batches.py
@@ -10,7 +10,7 @@
 from typing import Final
 
 import pandas as pd
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS
 from matbench_discovery.plots import hist_classified_stable_vs_hull_dist

diff --git a/scripts/model_figs/cumulative_metrics.py b/scripts/model_figs/cumulative_metrics.py
@@ -10,7 +10,7 @@
 
 # %%
 import pandas as pd
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS
 from matbench_discovery.plots import cumulative_metrics

diff --git a/scripts/model_figs/hist_classified_stable_vs_hull_dist_models.py b/scripts/model_figs/hist_classified_stable_vs_hull_dist_models.py
@@ -8,7 +8,7 @@
 import math
 from typing import Final
 
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS, today
 from matbench_discovery.plots import hist_classified_stable_vs_hull_dist, plt

diff --git a/scripts/model_figs/make_metrics_tables.py b/scripts/model_figs/make_metrics_tables.py
@@ -8,13 +8,14 @@
 
 import numpy as np
 import pandas as pd
+from pymatviz.io import df_to_pdf
 from sklearn.dummy import DummyClassifier
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS
 from matbench_discovery.data import DATA_FILES, df_wbm
 from matbench_discovery.metrics import stable_metrics
 from matbench_discovery.models import MODEL_METADATA
-from matbench_discovery.plots import df_to_pdf, df_to_svelte_table
+from matbench_discovery.plots import df_to_svelte_table
 from matbench_discovery.preds import df_metrics, df_metrics_10k, each_true_col
 
 __author__ = "Janosh Riebesell"
@@ -25,7 +26,7 @@
     "M3GNet→MEGNet": "M3GNet",
     "CHGNet→MEGNet": "CHGNet",
 }
-train_size_col = "training size"
+train_size_col = "Training Size"
 df_metrics.loc[train_size_col] = df_metrics_10k.loc[train_size_col] = ""
 for model in df_metrics:
     model_name = name_map.get(model, model)
@@ -62,7 +63,7 @@
 df_metrics_10k["Dummy"] = dummy_metrics
 
 
-# %% for each model this ontology dict specifies (training type, test type, model class)
+# %% for each model this ontology dict specifies (training type, test type, model type)
 ontology = {
     "ALIGNN": ("RS2RE", "IS2RE", "GNN"),
     # "ALIGNN Pretrained": ("RS2RE", "IS2RE", "GNN"),
@@ -80,7 +81,7 @@
     "CHGNet→MEGNet": ("S2EFSM", "IS2RE-SR", "UIP-GNN"),
     "Dummy": ("", "", ""),
 }
-ontology_cols = ["Trained", "Deployed", "Model Class"]
+ontology_cols = ["Trained", "Deployed", model_type_col := "Model Type"]
 df_ont = pd.DataFrame(ontology, index=ontology_cols)
 # RS2RE = relaxed structure to relaxed energy
 # RP2RE = relaxed prototype to predicted energy
@@ -104,7 +105,7 @@
 make_uip_megnet_comparison = False
 show_cols = (
     f"F1,DAF,Precision,Accuracy,TPR,TNR,MAE,RMSE,{R2_col},"
-    "training size,Model Class".split(",")
+    f"{train_size_col},{model_type_col}".split(",")
 )
 
 for label, df in (("-first-10k", df_metrics_10k), ("", df_metrics)):
@@ -160,7 +161,7 @@
     )
     try:
         df_to_pdf(styler, f"{PDF_FIGS}/metrics-table{label}.pdf")
-    except ImportError as exc:
+    except (ImportError, RuntimeError) as exc:
         print(f"df_to_pdf failed: {exc}")
 
 

diff --git a/scripts/model_figs/model_run_times.py b/scripts/model_figs/model_run_times.py
@@ -15,7 +15,7 @@
 import requests
 import wandb
 import wandb.apis.public
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 from tqdm import tqdm
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS, SITE_MODELS, WANDB_PATH

diff --git a/scripts/model_figs/roc_prc_curves_models.py b/scripts/model_figs/roc_prc_curves_models.py
@@ -8,7 +8,7 @@
 import math
 
 import pandas as pd
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 from sklearn.metrics import auc, precision_recall_curve, roc_curve
 from tqdm import tqdm
 

diff --git a/scripts/model_figs/rolling_mae_vs_hull_dist_models.py b/scripts/model_figs/rolling_mae_vs_hull_dist_models.py
@@ -6,7 +6,7 @@
 
 import numpy as np
 import plotly.graph_objects as go
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS
 from matbench_discovery.plots import rolling_mae_vs_hull_dist

diff --git a/scripts/rolling_mae_vs_hull_dist.py b/scripts/rolling_mae_vs_hull_dist.py
@@ -2,7 +2,7 @@
 
 
 # %%
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS, today
 from matbench_discovery.plots import rolling_mae_vs_hull_dist

diff --git a/scripts/rolling_mae_vs_hull_dist_wbm_batches.py b/scripts/rolling_mae_vs_hull_dist_wbm_batches.py
@@ -4,7 +4,7 @@
 
 
 # %%
-from pymatviz.utils import save_fig
+from pymatviz.io import save_fig
 
 from matbench_discovery import PDF_FIGS, SITE_FIGS, today
 from matbench_discovery.plots import plt, rolling_mae_vs_hull_dist