From b86b476c1b85426e18f973a69f24271cce4098be Mon Sep 17 00:00:00 2001
From: Ardavan Oskooi <ardavano@google.com>
Date: Thu, 2 Feb 2023 12:16:42 -0800
Subject: [PATCH] Add type hints and more details on use of subpixel smoothing
 to tutorial on adjoint optimization (#2387)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add type hints and more details on use of subpixel smoothing in adjoint-optimization tutorial

* fix type hint for use_epsavg parameter of constraint function

* describe why projection operator will always produce values between 0 and 1 regardless of the value of its β parameter

* Update doc/docs/Python_Tutorials/Adjoint_Solver.md

---------

Co-authored-by: Steven G. Johnson <stevenj@mit.edu>
---
 doc/docs/Python_Tutorials/Adjoint_Solver.md   | 379 ++++++++++--------
 .../adjoint_optimization/mode_converter.py    | 122 ++++--
 2 files changed, 302 insertions(+), 199 deletions(-)

diff --git a/doc/docs/Python_Tutorials/Adjoint_Solver.md b/doc/docs/Python_Tutorials/Adjoint_Solver.md
index f28998deb..44fa60a40 100644
--- a/doc/docs/Python_Tutorials/Adjoint_Solver.md
+++ b/doc/docs/Python_Tutorials/Adjoint_Solver.md
@@ -85,15 +85,18 @@ in the adjoint-solver module is based on [A.M. Hammond et al., Optics
 Express, Vol. 29, pp. 23916-38
 (2021)](https://doi.org/10.1364/OE.431188).
 
-There are five important items to note in the set up of the
+There are six important items to highlight in the set up of this
 optimization problem:
 
 1. The lengthscale constraint is activated only in the final epoch. It
-   is often helpful to binarize the design before this final
+   is often helpful to binarize the design using a large $\beta$ value
+   for the projection operator (hyperbolic tangent) before this final
    epoch. This is because the lengthscale constraint forces
    binarization which could induce large changes in an initial
-   greyscale design and thus irrevocably spoil the performance of the
-   final design.
+   grayscale design and thus irrevocably spoil the performance of the
+   final design. Note that regardless of the value of $\beta$,
+   projecting the design weights $u$ will produce grayscale values
+   between 0 and 1 whenever $u \approx \eta \pm \frac{1}{\beta}$.
 
 2. The initial value of the epigraph variable of the final epoch (in
   which the minimum feature size constraint is imposed) should take
@@ -105,27 +108,35 @@ optimization problem:
   (CCSA) algorithm).
 
 3. The edge of the design region is padded by a filter radius (rather
-  than e.g., a single pixel) to produce measured lengthscales of the
-  final design that are consistent with the imposed constraint.
+  than e.g., a single pixel) to produce measured minimum feature sizes
+  of the final design that are consistent with the imposed constraint.
 
-4. The hyperparameters of the lengthscale constraint function (`a1`,
-  `b1`, and `c0` in the `glc` function of the script below), need to
+4. The hyperparameters of the feature-size constraint function
+  (`a1`, `b1`, and `c0` in the `glc` function of the script below), need to
   be chosen carefully to produce final designs which do not
   significantly degrade the performance of the unconstrained designs
   at the start of the final epoch.
 
 5. Damping of the design weights is used for the early epochs in which
-  the design is mostly greyscale to induce binarization. Subpixel
-  averaging of the design weights is used in the later epochs in which
-  the $\beta$ parameter of the thresholding function is large and thus
-  the design is mostly binarized. Note that the accuracy of the
-  adjoint gradients will break down for a binary design without
-  subpixel smoothing.
+  the $\beta$ parameter of the projection function is small (< ~50) and
+  the design is mostly grayscale in order to induce binarization.
+
+6. The subpixel-smoothing feature of the [`MaterialGrid`](../Python_User_Interface.md#materialgrid)
+  is necessary whenever the $\beta$ parameter of the projection function
+  is large (> ~50) and thus the design is binary (or nearly so). Without
+  subpixel smoothing, then the gradients are nearly zero
+ except for $u \approx \eta \pm 1/\beta$ where the derivatives
+ and second derivatives blow up, causing optimization algorithms to break down. When subpixel
+  smoothing is enabled (`do_averaging=True`), the weights are projected
+  *internally* using the `beta` parameter. For this reason, any
+  preprocessing (i.e., mapping) of the weights outside of the `MaterialGrid`
+  should apply only a filter to the weights but must not perform any
+  projection.
 
 A schematic of the final design and the simulation layout is shown
-below. The minimum lengthscale of the final design, measured using a
+below. The minimum feature size of the final design, measured using a
 [ruler](https://github.com/NanoComp/photonics-opt-testbed/tree/main/ruler),
-is 165 nm. This value is consistent with the imposed lengthscale since
+is 165 nm. This value is consistent with the imposed constraint since
 it is approximately within one design pixel (10 nm).
 
 ![](../images/mode_converter_sim_layout.png#center)
@@ -195,37 +206,36 @@ approximately 14 hours.
 ```py
 import numpy as np
 import matplotlib
-matplotlib.use('agg')
+matplotlib.use("agg")
 import matplotlib.pyplot as plt
 from autograd import numpy as npa, tensor_jacobian_product, grad
 import nlopt
 import meep as mp
 import meep.adjoint as mpa
+from typing import NamedTuple
 
 resolution = 50  # pixels/μm
 
-w = 0.4          # waveguide width
-l = 3.0          # waveguide length (on each side of design region)
-dpad = 0.6       # padding length above/below design region
-dpml = 1.0       # PML thickness
-dx = 1.6         # length of design region
-dy = 1.6         # width of design region
+w = 0.4  # waveguide width
+l = 3.0  # waveguide length (on each side of design region)
+dpad = 0.6  # padding length above/below design region
+dpml = 1.0  # PML thickness
+dx = 1.6  # length of design region
+dy = 1.6  # width of design region
 
-sx = dpml+l+dx+l+dpml
-sy = dpml+dpad+dy+dpad+dpml
-cell_size = mp.Vector3(sx,sy,0)
+sx = dpml + l + dx + l + dpml
+sy = dpml + dpad + dy + dpad + dpml
+cell_size = mp.Vector3(sx, sy, 0)
 
 pml_layers = [mp.PML(thickness=dpml)]
 
 # wavelengths for minimax optimization
 wvls = (1.265, 1.270, 1.275, 1.285, 1.290, 1.295)
-frqs = [1/wvl for wvl in wvls]
+frqs = [1 / wvl for wvl in wvls]
 
 minimum_length = 0.15  # minimum length scale (μm)
-eta_i = (
-    0.5 # blueprint design field thresholding point (between 0 and 1)
-)
-eta_e = 0.75       # erosion design field thresholding point (between 0 and 1)
+eta_i = 0.5  # blueprint design field thresholding point (between 0 and 1)
+eta_e = 0.75  # erosion design field thresholding point (between 0 and 1)
 eta_d = 1 - eta_e  # dilation design field thresholding point (between 0 and 1)
 filter_radius = mpa.get_conic_radius_from_eta_e(minimum_length, eta_e)
 print(f"filter_radius:, {filter_radius:.6f}")
@@ -233,27 +243,27 @@ print(f"filter_radius:, {filter_radius:.6f}")
 # pulsed source center frequency and bandwidth
 wvl_min = 1.26
 wvl_max = 1.30
-frq_min = 1/wvl_max
-frq_max = 1/wvl_min
-fcen = 0.5*(frq_min+frq_max)
-df = frq_max-frq_min
+frq_min = 1 / wvl_max
+frq_max = 1 / wvl_min
+fcen = 0.5 * (frq_min + frq_max)
+df = frq_max - frq_min
 
 eig_parity = mp.ODD_Z
-src_pt = mp.Vector3(-0.5*sx+dpml,0,0)
+src_pt = mp.Vector3(-0.5 * sx + dpml, 0, 0)
 
 nSiO2 = 1.5
 SiO2 = mp.Medium(index=nSiO2)
 nSi = 3.5
 Si = mp.Medium(index=nSi)
 
-design_region_size = mp.Vector3(dx,dy,0)
-design_region_resolution = int(2*resolution)
-Nx = int(design_region_size.x*design_region_resolution)
-Ny = int(design_region_size.y*design_region_resolution)
+design_region_size = mp.Vector3(dx, dy, 0)
+design_region_resolution = int(2 * resolution)
+Nx = int(design_region_size.x * design_region_resolution)
+Ny = int(design_region_size.y * design_region_resolution)
 
 # impose a bit "mask" of thickness equal to the filter radius
 # around the edges of the design region in order to prevent
-# violations of the minimum linewidth constraint.
+# violations of the minimum feature size constraint.
 
 x_g = np.linspace(
     -design_region_size.x / 2,
@@ -272,40 +282,43 @@ X_g, Y_g = np.meshgrid(
     indexing="ij",
 )
 
-left_wg_mask = (
-    (X_g <= -design_region_size.x / 2 + filter_radius) &
-    (np.abs(Y_g) <= w / 2)
+left_wg_mask = (X_g <= -design_region_size.x / 2 + filter_radius) & (
+    np.abs(Y_g) <= w / 2
 )
-right_wg_mask = (
-    (X_g >= design_region_size.x / 2 - filter_radius) &
-    (np.abs(Y_g) <= w / 2)
+right_wg_mask = (X_g >= design_region_size.x / 2 - filter_radius) & (
+    np.abs(Y_g) <= w / 2
 )
 Si_mask = left_wg_mask | right_wg_mask
 
 border_mask = (
-    (X_g <= -design_region_size.x / 2 + filter_radius) |
-    (X_g >= design_region_size.x / 2 - filter_radius) |
-    (Y_g <= -design_region_size.y / 2 + filter_radius) |
-    (Y_g >= design_region_size.y / 2 - filter_radius)
+    (X_g <= -design_region_size.x / 2 + filter_radius)
+    | (X_g >= design_region_size.x / 2 - filter_radius)
+    | (Y_g <= -design_region_size.y / 2 + filter_radius)
+    | (Y_g >= design_region_size.y / 2 - filter_radius)
 )
 SiO2_mask = border_mask.copy()
 SiO2_mask[Si_mask] = False
 
-refl_pt = mp.Vector3(-0.5*sx+dpml+0.5*l)
-tran_pt = mp.Vector3(0.5*sx-dpml-0.5*l)
+refl_pt = mp.Vector3(-0.5 * sx + dpml + 0.5 * l)
+tran_pt = mp.Vector3(0.5 * sx - dpml - 0.5 * l)
 
 stop_cond = mp.stop_when_fields_decayed(50, mp.Ez, refl_pt, 1e-8)
 
-def mapping(x, eta, beta):
+
+def mapping(x: np.ndarray, eta: float, beta: float) -> np.ndarray:
     """A differentiable mapping function which applies, in order,
        the following sequence of transformations to the design weights:
        (1) a bit mask for the boundary pixels, (2) convolution with a
-       conic filter, and (3) projection via a hyperbolic tangent.
+       conic filter, and (3) projection via a hyperbolic tangent (if
+       necessary).
 
     Args:
       x: design weights as a 1d array of size Nx*Ny.
       eta: erosion/dilation parameter for the projection.
-      beta: bias parameter for the projection.
+      beta: bias parameter for the projection. A value of 0 is no projection.
+
+    Returns:
+      The mapped design weights as a 1d array.
     """
     x = npa.where(
         Si_mask.flatten(),
@@ -314,7 +327,7 @@ def mapping(x, eta, beta):
             SiO2_mask.flatten(),
             0,
             x,
-        )
+        ),
     )
 
     filtered_field = mpa.conic_filter(
@@ -325,51 +338,61 @@ def mapping(x, eta, beta):
         design_region_resolution,
     )
 
-    projected_field = mpa.tanh_projection(
-        filtered_field,
-        beta,
-        eta,
-    )
+    if beta == 0:
+        return filtered_field.flatten()
 
-    return projected_field.flatten()
+    else:
+        projected_field = mpa.tanh_projection(
+            filtered_field,
+            beta,
+            eta,
+        )
 
+        return projected_field.flatten()
 
-def f(x, grad):
+
+def f(x: np.ndarray, grad: np.ndarray) -> float:
     """Objective function for the epigraph formulation.
 
     Args:
       x: 1d array of size 1+Nx*Ny containing epigraph variable (first element)
          and design weights (remaining Nx*Ny elements).
       grad: the gradient as a 1d array of size 1+Nx*Ny modified in place.
+
+    Returns:
+      The epigraph variable (a scalar).
     """
     t = x[0]  # epigraph variable
-    v = x[1:] # design weights
+    v = x[1:]  # design weights
     if grad.size > 0:
         grad[0] = 1
         grad[1:] = 0
     return t
 
 
-def c(result, x, gradient, eta, beta):
+def c(result: np.ndarray, x: np.ndarray, gradient: np.ndarray, eta: float,
+      beta: float, use_epsavg: bool):
     """Constraint function for the epigraph formulation.
 
-       Args:
-         result: the result of the function evaluation modified in place.
-         x: 1d array of size 1+Nx*Ny containing epigraph variable (first
-            element) and design weights (remaining Nx*Ny elements).
-         gradient: the Jacobian matrix with dimensions (1+Nx*Ny,
-                   2*num. wavelengths) modified in place.
-         eta: erosion/dilation parameter for projection.
-         beta: bias parameter for projection.
+    Args:
+      result: the result of the function evaluation modified in place.
+      x: 1d array of size 1+Nx*Ny containing epigraph variable (first
+         element) and design weights (remaining Nx*Ny elements).
+      gradient: the Jacobian matrix with dimensions (1+Nx*Ny,
+                2*num. wavelengths) modified in place.
+      eta: erosion/dilation parameter for projection.
+      beta: bias parameter for projection.
+      use_epsavg: whether to use subpixel smoothing.
     """
     t = x[0]  # epigraph variable
-    v = x[1:] # design weights
+    v = x[1:]  # design weights
 
-    f0, dJ_du = opt([mapping(v, eta, beta)])
+    f0, dJ_du = opt([mapping(v, eta, 0 if use_epsavg else beta)])
 
     f0_reflection = f0[0]
     f0_transmission = f0[1]
     f0_merged = np.concatenate((f0_reflection, f0_transmission))
+    f0_merged_str = '[' + ','.join(str(ff) for ff in f0_merged) + ']'
 
     dJ_du_reflection = dJ_du[0]
     dJ_du_transmission = dJ_du[1]
@@ -379,7 +402,7 @@ def c(result, x, gradient, eta, beta):
     my_grad[:, nfrq:] = dJ_du_transmission
 
     # backpropagate the gradients through mapping function
-    for k in range(2*nfrq):
+    for k in range(2 * nfrq):
         my_grad[:, k] = tensor_jacobian_product(mapping, 0)(
             v,
             eta,
@@ -396,52 +419,58 @@ def c(result, x, gradient, eta, beta):
     objfunc_history.append(np.real(f0_merged))
     epivar_history.append(t)
 
-    print(f"iteration:, {cur_iter[0]:3d}, eta: {eta}, beta: {beta:2d}, "
-          f"t: {t:.5f}, obj. func.: {f0_merged}")
+    print(
+        f"iteration:, {cur_iter[0]:3d}, eta: {eta}, beta: {beta:2d}, "
+        f"t: {t:.5f}, obj. func.: {f0_merged_str}"
+    )
 
     cur_iter[0] = cur_iter[0] + 1
 
 
-def glc(result, x, gradient, beta):
+def glc(result: np.ndarray, x: np.ndarray, gradient: np.ndarray,
+        beta: float) -> float:
     """Constraint function for the minimum linewidth.
 
-       Args:
-         result: the result of the function evaluation modified in place.
-         x: 1d array of size 1+Nx*Ny containing epigraph variable (first
-            element) and design weights (remaining elements).
-         gradient: the Jacobian matrix with dimensions (1+Nx*Ny,
-                   num. wavelengths) modified in place.
-         beta: bias parameter for projection.
+    Args:
+      result: the result of the function evaluation modified in place.
+      x: 1d array of size 1+Nx*Ny containing epigraph variable (first
+         element) and design weights (remaining elements).
+      gradient: the Jacobian matrix with dimensions (1+Nx*Ny,
+                num. wavelengths) modified in place.
+      beta: bias parameter for projection.
+
+    Returns:
+      The value of the constraint function (a scalar).
     """
     t = x[0]  # dummy parameter
-    v = x[1:] # design parameters
-    a1 = 1e-3 # hyper parameter (primary)
-    b1 = 0    # hyper parameter (secondary)
-    gradient[:,0] = -a1
+    v = x[1:]  # design parameters
+    a1 = 1e-3  # hyper parameter (primary)
+    b1 = 0  # hyper parameter (secondary)
+    gradient[:, 0] = -a1
 
     filter_f = lambda a: mpa.conic_filter(
-        a.reshape(Nx,Ny),
+        a.reshape(Nx, Ny),
         filter_radius,
         design_region_size.x,
         design_region_size.y,
         design_region_resolution,
     )
-    threshold_f = lambda a: mpa.tanh_projection(a,beta,eta_i)
+    threshold_f = lambda a: mpa.tanh_projection(a, beta, eta_i)
 
     # hyper parameter (constant factor and exponent)
-    c0 = 1e7*(filter_radius*1/resolution)**4
+    c0 = 1e7 * (filter_radius * 1 / resolution) ** 4
 
-    M1 = lambda a: mpa.constraint_solid(a,c0,eta_e,filter_f,threshold_f,1)
-    M2 = lambda a: mpa.constraint_void(a,c0,eta_d,filter_f,threshold_f,1)
+    M1 = lambda a: mpa.constraint_solid(a, c0, eta_e, filter_f, threshold_f, 1)
+    M2 = lambda a: mpa.constraint_void(a, c0, eta_d, filter_f, threshold_f, 1)
 
     g1 = grad(M1)(v)
     g2 = grad(M2)(v)
 
-    result[0] = M1(v) - a1*t - b1
-    result[1] = M2(v) - a1*t - b1
+    result[0] = M1(v) - a1 * t - b1
+    result[1] = M2(v) - a1 * t - b1
 
-    gradient[0,1:] = g1.flatten()
-    gradient[1,1:] = g2.flatten()
+    gradient[0, 1:] = g1.flatten()
+    gradient[1, 1:] = g2.flatten()
 
     t1 = (M1(v) - b1) / a1
     t2 = (M2(v) - b1) / a1
@@ -451,19 +480,19 @@ def glc(result, x, gradient, beta):
     return max(t1, t2)
 
 
-def straight_waveguide():
+def straight_waveguide() -> (np.ndarray, NamedTuple):
     """Computes the DFT fields from the mode source in a straight waveguide
        for use as normalization of the reflectance measurement during the
        optimization.
 
     Returns:
-      1d array of DFT fields and DFT fields object returned by
-      `meep.get_flux_data`.
+      A 2-tuple consisting of a 1d array of DFT fields and DFT fields object
+      returned by `meep.get_flux_data`.
     """
     sources = [
         mp.EigenModeSource(
-            src=mp.GaussianSource(fcen,fwidth=df),
-            size=mp.Vector3(0,sy,0),
+            src=mp.GaussianSource(fcen, fwidth=df),
+            size=mp.Vector3(0, sy, 0),
             center=src_pt,
             eig_band=1,
             eig_parity=eig_parity,
@@ -472,7 +501,7 @@ def straight_waveguide():
 
     geometry = [
         mp.Block(
-            size=mp.Vector3(mp.inf,w,mp.inf),
+            size=mp.Vector3(mp.inf, w, mp.inf),
             center=mp.Vector3(),
             material=Si,
         )
@@ -490,8 +519,7 @@ def straight_waveguide():
 
     refl_mon = sim.add_mode_monitor(
         frqs,
-        mp.ModeRegion(center=refl_pt,
-                      size=mp.Vector3(0,sy,0)),
+        mp.ModeRegion(center=refl_pt, size=mp.Vector3(0, sy, 0)),
         yee_grid=True,
     )
 
@@ -504,14 +532,18 @@ def straight_waveguide():
     )
 
     coeffs = res.alpha
-    input_flux = np.abs(coeffs[0,:,0])**2
+    input_flux = np.abs(coeffs[0, :, 0]) ** 2
     input_flux_data = sim.get_flux_data(refl_mon)
 
     return input_flux, input_flux_data
 
 
-def mode_converter_optimization(input_flux, input_flux_data, use_damping,
-                                use_epsavg):
+def mode_converter_optimization(
+        input_flux: np.ndarray,
+        input_flux_data: NamedTuple,
+        use_damping: bool,
+        use_epsavg: bool,
+        beta: float) -> mpa.OptimizationProblem:
     """Sets up the adjoint optimization of the waveguide mode converter.
 
     Args:
@@ -524,24 +556,21 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping,
       A `meep.adjoint.OptimizationProblem` class object.
     """
     matgrid = mp.MaterialGrid(
-        mp.Vector3(Nx,Ny,0),
+        mp.Vector3(Nx, Ny, 0),
         SiO2,
         Si,
-        weights=np.ones((Nx,Ny)),
+        weights=np.ones((Nx, Ny)),
+        beta=beta if use_epsavg else 0,
         do_averaging=True if use_epsavg else False,
-        damping=0.02*2*np.pi*fcen if use_damping else 0,
+        damping=0.02 * 2 * np.pi * fcen if use_damping else 0,
     )
 
     matgrid_region = mpa.DesignRegion(
         matgrid,
         volume=mp.Volume(
             center=mp.Vector3(),
-            size=mp.Vector3(
-                design_region_size.x,
-                design_region_size.y,
-                mp.inf
-            ),
-        )
+            size=mp.Vector3(design_region_size.x, design_region_size.y, mp.inf),
+        ),
     )
 
     matgrid_geometry = [
@@ -554,9 +583,9 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping,
 
     geometry = [
         mp.Block(
-            size=mp.Vector3(mp.inf,w,mp.inf),
             center=mp.Vector3(),
-            material=Si
+            size=mp.Vector3(mp.inf, w, mp.inf),
+            material=Si,
         )
     ]
 
@@ -564,11 +593,12 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping,
 
     sources = [
         mp.EigenModeSource(
-            src=mp.GaussianSource(fcen,fwidth=df),
-            size=mp.Vector3(0,sy,0),
+            src=mp.GaussianSource(fcen, fwidth=df),
+            size=mp.Vector3(0, sy, 0),
             center=src_pt,
             eig_band=1,
-            eig_parity=eig_parity),
+            eig_parity=eig_parity,
+        ),
     ]
 
     sim = mp.Simulation(
@@ -586,7 +616,7 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping,
             sim,
             mp.Volume(
                 center=refl_pt,
-                size=mp.Vector3(0,sy,0),
+                size=mp.Vector3(0, sy, 0),
             ),
             1,
             forward=False,
@@ -597,17 +627,17 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping,
             sim,
             mp.Volume(
                 center=tran_pt,
-                size=mp.Vector3(0,sy,0),
+                size=mp.Vector3(0, sy, 0),
             ),
             2,
             eig_parity=eig_parity,
-        )
+        ),
     ]
 
-    def J1(refl_mon,tran_mon):
+    def J1(refl_mon, tran_mon):
         return npa.power(npa.abs(refl_mon), 2) / input_flux
 
-    def J2(refl_mon,tran_mon):
+    def J2(refl_mon, tran_mon):
         return 1 - npa.power(npa.abs(tran_mon), 2) / input_flux
 
     opt = mpa.OptimizationProblem(
@@ -621,7 +651,7 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping,
     return opt
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     input_flux, input_flux_data = straight_waveguide()
 
     algorithm = nlopt.LD_MMA
@@ -631,17 +661,19 @@ if __name__ == '__main__':
 
     # initial design parameters
     x = np.ones((n,)) * 0.5
-    x[Si_mask.flatten()] = 1.    # set the edges of waveguides to silicon
-    x[SiO2_mask.flatten()] = 0.  # set the other edges to SiO2
+    x[Si_mask.flatten()] = 1.0  # set the edges of waveguides to silicon
+    x[SiO2_mask.flatten()] = 0.0  # set the other edges to SiO2
 
     # lower and upper bounds for design weights
     lb = np.zeros((n,))
-    lb[Si_mask.flatten()] = 1.
+    lb[Si_mask.flatten()] = 1.0
     ub = np.ones((n,))
-    ub[SiO2_mask.flatten()] = 0.
+    ub[SiO2_mask.flatten()] = 0.0
 
-    # insert epigraph variable initial value and bounds in the design array
-    x = np.insert(x, 0, 1.2)   # ignored
+    # insert epigraph variable initial value (arbitrary) and bounds into the
+    # design array. the actual value is determined by the objective and
+    # constraint functions below.
+    x = np.insert(x, 0, 1.2)
     lb = np.insert(lb, 0, -np.inf)
     ub = np.insert(ub, 0, +np.inf)
 
@@ -649,11 +681,11 @@ if __name__ == '__main__':
     epivar_history = []
     cur_iter = [0]
 
-    beta_thresh = 64
+    beta_thresh = 64 # threshold beta above which to use subpixel smoothing
     betas = [8, 16, 32, 64, 128, 256]
     max_evals = [80, 80, 100, 120, 120, 100]
-    tol_epi = np.array([1e-4] * 2 * len(frqs)) # R, 1-T
-    tol_lw = np.array([1e-8] * 2) # line width, line spacing
+    tol_epi = np.array([1e-4] * 2 * len(frqs))  # R, 1-T
+    tol_lw = np.array([1e-8] * 2)  # line width, line spacing
 
     for beta, max_eval in zip(betas, max_evals):
         solver = nlopt.opt(algorithm, n + 1)
@@ -661,9 +693,16 @@ if __name__ == '__main__':
         solver.set_upper_bounds(ub)
         solver.set_min_objective(f)
         solver.set_maxeval(max_eval)
-        solver.set_param("dual_ftol_rel",1e-7)
+        solver.set_param("dual_ftol_rel", 1e-7)
         solver.add_inequality_mconstraint(
-            lambda rr, xx, gg: c(rr, xx, gg, eta_i, beta),
+            lambda rr, xx, gg: c(
+                rr,
+                xx,
+                gg,
+                eta_i,
+                beta,
+                False if beta < beta_thresh else True,
+            ),
             tol_epi,
         )
         solver.set_param("verbosity", 1)
@@ -671,8 +710,9 @@ if __name__ == '__main__':
         opt = mode_converter_optimization(
             input_flux,
             input_flux_data,
-            True, # use_damping
-            False if beta <= beta_thresh else True, # use_epsavg
+            True,  # use_damping
+            False if beta < beta_thresh else True,  # use_epsavg
+            beta,
         )
 
         # apply the minimum linewidth constraint
@@ -680,10 +720,15 @@ if __name__ == '__main__':
         # binary design from the previous epoch.
         if beta == betas[-1]:
             res = np.zeros(2)
-            grd = np.zeros((2,n+1))
+            grd = np.zeros((2, n + 1))
             t = glc(res, x, grd, beta)
             solver.add_inequality_mconstraint(
-                lambda rr, xx, gg: glc(rr, xx, gg, beta),
+                lambda rr, xx, gg: glc(
+                    rr,
+                    xx,
+                    gg,
+                    beta,
+                ),
                 tol_lw,
             )
 
@@ -693,16 +738,20 @@ if __name__ == '__main__':
         # function over the six wavelengths and the lengthscale
         # constraint (final epoch only).
         t0 = opt(
-            [mapping(x[1:], eta_i, beta)],
+            [
+                mapping(
+                    x[1:],
+                    eta_i,
+                    beta if beta < beta_thresh else 0,
+                ),
+            ],
             need_gradient=False,
         )
-        t0 = np.concatenate((t0[0][0],t0[0][1]))
+        t0 = np.concatenate((t0[0][0], t0[0][1]))
+        t0_str = '[' + ','.join(str(tt) for tt in t0) + ']'
         x[0] = np.amax(t0)
-        if beta == betas[-1]:
-            x[0] = 1.05 * max(x[0], t)
-        else:
-            x[0] = 1.05 * x[0]
-        print(f"data:, {beta}, {t0}, {x[0]}")
+        x[0] = 1.05 * (max(x[0], t) if beta == betas[-1] else x[0])
+        print(f"data:, {beta}, {t0_str}, {x[0]}")
 
         x[:] = solver.optimize(x)
 
@@ -710,39 +759,39 @@ if __name__ == '__main__':
             x[1:],
             eta_i,
             beta,
-        ).reshape(Nx,Ny)
+        ).reshape(Nx, Ny)
 
         # save the unmapped weights and a bitmap image
         # of the design weights at the end of each epoch.
         fig, ax = plt.subplots()
         ax.imshow(
             optimal_design_weights,
-            cmap='binary',
-            interpolation='none',
+            cmap="binary",
+            interpolation="none",
         )
         ax.set_axis_off()
         if mp.am_master():
             fig.savefig(
-                f'optimal_design_beta{beta}.png',
+                f"optimal_design_beta{beta}.png",
                 dpi=150,
-                bbox_inches='tight',
+                bbox_inches="tight",
             )
-            # save the final design (unmapped) as a 2d array in CSV format
+            # save the final (unmapped) design as a 2d array in CSV format
             np.savetxt(
-                f'design_weights_beta{beta}.csv',
-                x[1:].reshape(Nx,Ny),
-                fmt='%4.2f',
-                delimiter=','
+                f"unmapped_design_weights_beta{beta}.csv",
+                x[1:].reshape(Nx, Ny),
+                fmt="%4.2f",
+                delimiter=",",
             )
 
     # save all the important optimization parameters and output
     # as separate arrays in a single file for post processing.
-    with open("optimal_design.npz","wb") as fl:
+    with open("optimal_design.npz", "wb") as fl:
         np.savez(
             fl,
             Nx=Nx,
             Ny=Ny,
-            design_region_size=(dx,dy),
+            design_region_size=(dx, dy),
             design_region_resolution=design_region_resolution,
             betas=betas,
             max_eval=max_eval,
diff --git a/python/examples/adjoint_optimization/mode_converter.py b/python/examples/adjoint_optimization/mode_converter.py
index b672f7ee3..29fba4733 100644
--- a/python/examples/adjoint_optimization/mode_converter.py
+++ b/python/examples/adjoint_optimization/mode_converter.py
@@ -20,6 +20,7 @@
 import nlopt
 import meep as mp
 import meep.adjoint as mpa
+from typing import NamedTuple
 
 resolution = 50  # pixels/μm
 
@@ -70,7 +71,7 @@
 
 # impose a bit "mask" of thickness equal to the filter radius
 # around the edges of the design region in order to prevent
-# violations of the minimum linewidth constraint.
+# violations of the minimum feature size constraint.
 
 x_g = np.linspace(
     -design_region_size.x / 2,
@@ -112,16 +113,20 @@
 stop_cond = mp.stop_when_fields_decayed(50, mp.Ez, refl_pt, 1e-8)
 
 
-def mapping(x, eta, beta):
+def mapping(x: np.ndarray, eta: float, beta: float) -> np.ndarray:
     """A differentiable mapping function which applies, in order,
        the following sequence of transformations to the design weights:
        (1) a bit mask for the boundary pixels, (2) convolution with a
-       conic filter, and (3) projection via a hyperbolic tangent.
+       conic filter, and (3) projection via a hyperbolic tangent (if
+       necessary).
 
     Args:
       x: design weights as a 1d array of size Nx*Ny.
       eta: erosion/dilation parameter for the projection.
-      beta: bias parameter for the projection.
+      beta: bias parameter for the projection. A value of 0 is no projection.
+
+    Returns:
+      The mapped design weights as a 1d array.
     """
     x = npa.where(
         Si_mask.flatten(),
@@ -141,22 +146,29 @@ def mapping(x, eta, beta):
         design_region_resolution,
     )
 
-    projected_field = mpa.tanh_projection(
-        filtered_field,
-        beta,
-        eta,
-    )
+    if beta == 0:
+        return filtered_field.flatten()
+
+    else:
+        projected_field = mpa.tanh_projection(
+            filtered_field,
+            beta,
+            eta,
+        )
 
-    return projected_field.flatten()
+        return projected_field.flatten()
 
 
-def f(x, grad):
+def f(x: np.ndarray, grad: np.ndarray) -> float:
     """Objective function for the epigraph formulation.
 
     Args:
       x: 1d array of size 1+Nx*Ny containing epigraph variable (first element)
          and design weights (remaining Nx*Ny elements).
       grad: the gradient as a 1d array of size 1+Nx*Ny modified in place.
+
+    Returns:
+      The epigraph variable (a scalar).
     """
     t = x[0]  # epigraph variable
     v = x[1:]  # design weights
@@ -166,7 +178,14 @@ def f(x, grad):
     return t
 
 
-def c(result, x, gradient, eta, beta):
+def c(
+    result: np.ndarray,
+    x: np.ndarray,
+    gradient: np.ndarray,
+    eta: float,
+    beta: float,
+    use_epsavg: bool,
+):
     """Constraint function for the epigraph formulation.
 
     Args:
@@ -177,15 +196,17 @@ def c(result, x, gradient, eta, beta):
                 2*num. wavelengths) modified in place.
       eta: erosion/dilation parameter for projection.
       beta: bias parameter for projection.
+      use_epsavg: whether to use subpixel smoothing.
     """
     t = x[0]  # epigraph variable
     v = x[1:]  # design weights
 
-    f0, dJ_du = opt([mapping(v, eta, beta)])
+    f0, dJ_du = opt([mapping(v, eta, 0 if use_epsavg else beta)])
 
     f0_reflection = f0[0]
     f0_transmission = f0[1]
     f0_merged = np.concatenate((f0_reflection, f0_transmission))
+    f0_merged_str = "[" + ",".join(str(ff) for ff in f0_merged) + "]"
 
     dJ_du_reflection = dJ_du[0]
     dJ_du_transmission = dJ_du[1]
@@ -214,13 +235,13 @@ def c(result, x, gradient, eta, beta):
 
     print(
         f"iteration:, {cur_iter[0]:3d}, eta: {eta}, beta: {beta:2d}, "
-        f"t: {t:.5f}, obj. func.: {f0_merged}"
+        f"t: {t:.5f}, obj. func.: {f0_merged_str}"
     )
 
     cur_iter[0] = cur_iter[0] + 1
 
 
-def glc(result, x, gradient, beta):
+def glc(result: np.ndarray, x: np.ndarray, gradient: np.ndarray, beta: float) -> float:
     """Constraint function for the minimum linewidth.
 
     Args:
@@ -230,6 +251,9 @@ def glc(result, x, gradient, beta):
       gradient: the Jacobian matrix with dimensions (1+Nx*Ny,
                 num. wavelengths) modified in place.
       beta: bias parameter for projection.
+
+    Returns:
+      The value of the constraint function (a scalar).
     """
     t = x[0]  # dummy parameter
     v = x[1:]  # design parameters
@@ -269,14 +293,14 @@ def glc(result, x, gradient, beta):
     return max(t1, t2)
 
 
-def straight_waveguide():
+def straight_waveguide() -> (np.ndarray, NamedTuple):
     """Computes the DFT fields from the mode source in a straight waveguide
        for use as normalization of the reflectance measurement during the
        optimization.
 
     Returns:
-      1d array of DFT fields and DFT fields object returned by
-      `meep.get_flux_data`.
+      A 2-tuple consisting of a 1d array of DFT fields and DFT fields object
+      returned by `meep.get_flux_data`.
     """
     sources = [
         mp.EigenModeSource(
@@ -327,7 +351,13 @@ def straight_waveguide():
     return input_flux, input_flux_data
 
 
-def mode_converter_optimization(input_flux, input_flux_data, use_damping, use_epsavg):
+def mode_converter_optimization(
+    input_flux: np.ndarray,
+    input_flux_data: NamedTuple,
+    use_damping: bool,
+    use_epsavg: bool,
+    beta: float,
+) -> mpa.OptimizationProblem:
     """Sets up the adjoint optimization of the waveguide mode converter.
 
     Args:
@@ -344,6 +374,7 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping, use_ep
         SiO2,
         Si,
         weights=np.ones((Nx, Ny)),
+        beta=beta if use_epsavg else 0,
         do_averaging=True if use_epsavg else False,
         damping=0.02 * 2 * np.pi * fcen if use_damping else 0,
     )
@@ -365,7 +396,11 @@ def mode_converter_optimization(input_flux, input_flux_data, use_damping, use_ep
     ]
 
     geometry = [
-        mp.Block(size=mp.Vector3(mp.inf, w, mp.inf), center=mp.Vector3(), material=Si)
+        mp.Block(
+            center=mp.Vector3(),
+            size=mp.Vector3(mp.inf, w, mp.inf),
+            material=Si,
+        )
     ]
 
     geometry += matgrid_geometry
@@ -449,8 +484,10 @@ def J2(refl_mon, tran_mon):
     ub = np.ones((n,))
     ub[SiO2_mask.flatten()] = 0.0
 
-    # insert epigraph variable initial value and bounds in the design array
-    x = np.insert(x, 0, 1.2)  # ignored
+    # insert epigraph variable initial value (arbitrary) and bounds into the
+    # design array. the actual value is determined by the objective and
+    # constraint functions below.
+    x = np.insert(x, 0, 1.2)
     lb = np.insert(lb, 0, -np.inf)
     ub = np.insert(ub, 0, +np.inf)
 
@@ -458,7 +495,7 @@ def J2(refl_mon, tran_mon):
     epivar_history = []
     cur_iter = [0]
 
-    beta_thresh = 64
+    beta_thresh = 64  # threshold beta above which to use subpixel smoothing
     betas = [8, 16, 32, 64, 128, 256]
     max_evals = [80, 80, 100, 120, 120, 100]
     tol_epi = np.array([1e-4] * 2 * len(frqs))  # R, 1-T
@@ -472,7 +509,14 @@ def J2(refl_mon, tran_mon):
         solver.set_maxeval(max_eval)
         solver.set_param("dual_ftol_rel", 1e-7)
         solver.add_inequality_mconstraint(
-            lambda rr, xx, gg: c(rr, xx, gg, eta_i, beta),
+            lambda rr, xx, gg: c(
+                rr,
+                xx,
+                gg,
+                eta_i,
+                beta,
+                False if beta < beta_thresh else True,
+            ),
             tol_epi,
         )
         solver.set_param("verbosity", 1)
@@ -481,7 +525,8 @@ def J2(refl_mon, tran_mon):
             input_flux,
             input_flux_data,
             True,  # use_damping
-            False if beta <= beta_thresh else True,  # use_epsavg
+            False if beta < beta_thresh else True,  # use_epsavg
+            beta,
         )
 
         # apply the minimum linewidth constraint
@@ -492,7 +537,12 @@ def J2(refl_mon, tran_mon):
             grd = np.zeros((2, n + 1))
             t = glc(res, x, grd, beta)
             solver.add_inequality_mconstraint(
-                lambda rr, xx, gg: glc(rr, xx, gg, beta),
+                lambda rr, xx, gg: glc(
+                    rr,
+                    xx,
+                    gg,
+                    beta,
+                ),
                 tol_lw,
             )
 
@@ -502,16 +552,20 @@ def J2(refl_mon, tran_mon):
         # function over the six wavelengths and the lengthscale
         # constraint (final epoch only).
         t0 = opt(
-            [mapping(x[1:], eta_i, beta)],
+            [
+                mapping(
+                    x[1:],
+                    eta_i,
+                    beta if beta < beta_thresh else 0,
+                ),
+            ],
             need_gradient=False,
         )
         t0 = np.concatenate((t0[0][0], t0[0][1]))
+        t0_str = "[" + ",".join(str(tt) for tt in t0) + "]"
         x[0] = np.amax(t0)
-        if beta == betas[-1]:
-            x[0] = 1.05 * max(x[0], t)
-        else:
-            x[0] = 1.05 * x[0]
-        print(f"data:, {beta}, {t0}, {x[0]}")
+        x[0] = 1.05 * (max(x[0], t) if beta == betas[-1] else x[0])
+        print(f"data:, {beta}, {t0_str}, {x[0]}")
 
         x[:] = solver.optimize(x)
 
@@ -536,9 +590,9 @@ def J2(refl_mon, tran_mon):
                 dpi=150,
                 bbox_inches="tight",
             )
-            # save the final design (unmapped) as a 2d array in CSV format
+            # save the final (unmapped) design as a 2d array in CSV format
             np.savetxt(
-                f"design_weights_beta{beta}.csv",
+                f"unmapped_design_weights_beta{beta}.csv",
                 x[1:].reshape(Nx, Ny),
                 fmt="%4.2f",
                 delimiter=",",