-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.tex
2730 lines (2631 loc) · 166 KB
/
main.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
\documentclass[11pt,a4paper]{article}
% \documentclass[11pt]{amsart}
\input{preamble.tex}
% BODY {{{1
\date{\today}
\title{Mobility estimation for Langevin dynamics using control variates}
\author{%
G.A. Pavliotis\thanks{Department of Mathematics, Imperial College London (\email{g.pavliotis@imperial.ac.uk})}%
\hspace{2mm}\orcid{0000-0002-3468-9227}%
\and G. Stoltz\thanks{CERMICS, \'Ecole des Ponts, France \& MATHERIALS, Inria Paris (\email{gabriel.stoltz@enpc.fr})}
\hspace{2mm}\orcid{0000-0002-2797-5938}%
\and U. Vaes\thanks{MATHERIALS, Inria Paris (\email{urbain.vaes@inria.fr})}%
\hspace{2mm}\orcid{0000-0002-7629-7184}
}
\begin{document}
\maketitle
\begin{abstract}
The scaling of the mobility of two-dimensional Langevin dynamics in a periodic potential as the friction vanishes is not well understood for non-separable potentials.
Theoretical results are lacking,
and numerical calculation of the mobility in the underdamped regime is challenging because
the computational cost of standard Monte Carlo methods is inversely proportional to the friction coefficient,
while deterministic methods are ill-conditioned.
In this work, we propose a new variance-reduction method based on control variates for efficiently estimating the mobility of Langevin-type dynamics.
We provide bounds on the bias and variance of the proposed estimator,
and illustrate its efficacy through numerical experiments,
first in simple one-dimensional settings
and then for two-dimensional Langevin dynamics.
Our results corroborate previous numerical evidence that
the mobility scales as~$\gamma^{-\sigma}$, with~$0 < \sigma \leq 1$,
in the low friction regime for a simple non-separable potential.
\end{abstract}
% \begin{itemize}
% \item \textcolor{red}{Proof of \cref{proposition:semigroup_meanzero_observable} not indempotent?}
% \end{itemize}
\section{Introduction}%
\label{sec:introduction}
Langevin dynamics model the evolution of a system of particles interacting with an environment at fixed temperature.
They are widely used for the calculation of macroscopic properties of matter in molecular simulation~\cite{MR2723222,allen2017computer}.
Assuming a diagonal mass matrix,
the standard Langevin dynamics, sometimes called underdamped Langevin dynamics,
reads after appropriate non-dimensionalization~\cite[Section 2.2.4]{MR2681239}
\begin{subequations}
\label{eq:langevin}
\begin{align}
\label{eq:langevin_q}
\d \vect q_t &= \vect p_t \, \d t, \\
\label{eq:langevin_p}
\d \vect p_t &= - \grad V(\vect q_t) \, \d t - \gamma \, \vect p_t \, \d t + \sqrt{2 \gamma \beta^{-1}} \, \d \vect w_t.
\end{align}
\end{subequations}
Here, $\vect q_t \in \torus^d$ and $\vect p_t \in \real^d$ are the position and velocity variables,
with~$\torus^d = \real^d / 2\pi \integer^d$ the $d$-dimensional torus with period $2 \pi$.
Throughout this work, we emphasize vectorial quantities in bold.
The parameter $\gamma > 0$ is a dimensionless parameter called friction,
$\beta > 0$ is inversely proportional to the temperature,
$V$ is a smooth periodic potential
and~$\vect w_t$ is a standard $d$-dimensional Brownian motion.
% $M$ is the mass matrix,
% For simplicity we assume that $M = m I_d$,
% where $I_d \in \real^{d \times d}$ is the identity matrix.
% In this case $(\widetilde q_t, \widetilde p_t) := (q_{\sqrt{m} t}, m^{-1/2} p_{\sqrt{m} t})$
% is a weak solution of~\eqref{eq:langevin} with $m = 1$ and $\gamma$ replaced by $\gamma/\sqrt{m}$,
% so to further simplify we assume $m = 1$,
% keeping in mind that asymptotic results for the case $m \neq 1$ can be deduced from this transformation;
% see~\cite[Section 2.2.4]{MR2681239} for a more detailed motivation of this simplification.
% keeping in mind that results obtained in the limit;
The dynamics~\eqref{eq:langevin} is ergodic with respect to the Boltzmann--Gibbs probability measure
\begin{equation}
\label{eq:invariant_measure}
\mu(\d \vect q \, \d \vect p) = \frac{1}{Z} \exp \bigl( - \beta H(\vect q, \vect p) \bigr) \, \d \vect q \, \d \vect p,
\qquad H(\vect q, \vect p) = V(\vect q) + \frac{\abs{\vect p}^2}{2},
\end{equation}
with $Z< \infty$ the normalization constant.
It will be convenient to also introduce the marginal distributions
\begin{equation}
\label{eq:definition_prob_measures}
\nu(\d \vect q) = \frac{\e^{- \beta V(\vect q)} \, \d \vect q}{\int_{\torus^d}\e^{-\beta V}},
\qquad \kappa(\d \vect p) = \left( \frac{\beta}{2 \pi} \right)^{d/2}\exp \biggl( - \beta \frac{\abs*{\vect p}^2}{2} \biggr) \d \vect p.
\end{equation}
\paragraph{Definition of the mobility.}
The mobility in the direction $\vect e \in \real^d$ (with $|\vect e| = 1$)
for the dynamics~\eqref{eq:langevin} provides information on the behavior of the system
in response to an external forcing $\eta \vect e$ with magnitude~$\eta$ on the velocity process.
By analogy with macroscopic laws,
it is defined as the proportionality constant,
in the limit of a small forcing,
between the induced average velocity and the strength of the forcing.
More precisely,
the mobility in the direction $\vect e$ is defined mathematically as
\begin{equation}
\label{eq:relation_mobility_diffusion}
M^{\gamma}_{\vect e} = \lim_{\eta \to 0} \frac{1}{\eta}\expect_{\mu_{\eta}} [\vect e^\t \vect p] ,
\end{equation}
where $\mu_{\eta}$ is the invariant probability distribution of~\eqref{eq:langevin} when
an additional drift term $\eta \vect e$ is present on the right-hand side of~\eqref{eq:langevin_p}.
Let us emphasize that this additional drift term is not the gradient of a smooth periodic potential.
Nonetheless, it is possible to show that the probability measure $\mu_{\eta}$ exists and is uniquely defined,
and that the limit in~\eqref{eq:relation_mobility_diffusion} is well-defined;
see~\cite[Section 5]{MR3509213}.
Except when $\eta = 0$, in which case we recover~\eqref{eq:invariant_measure},
the measure~$\mu_{\eta}$ is not known explicitly,
and so $M_{\vect e}^{\gamma}$ cannot be obtained simply by numerical integration of the observable $\vect e^\t \vect p$ with respect to this measure.
It is well known,
based on the seminal works of Sutherland~\cite{sutherland1905lxxv}, Einstein~\cite{einstein1905molekularkinetischen} and Smoluchowski~\cite{von1906kinetischen} in the early 1900s,
that the mobility coincides
(up to the factor $\beta$)
with the so-called effective diffusion coefficient associated with the dynamics,
which opens the door to the simple Monte Carlo approach based on~\eqref{eq:naive_estimator} below for its estimation.
This link between mobility and diffusion,
known Eisntein's relation,
is made precise in the next paragraph,
where we also define the effective diffusion coefficient precisely.
% The equation relating mobility and diffusion is called Einstein's relation,
For a rigorous justification of Einstein's relation in the specific setting of the Langevin dynamics~\eqref{eq:langevin},
we refer to~\cite[Section~5.2]{MR3509213}; see also \cite[Section~3]{LMS16} and~\cite[Chapter~9]{pavliotis2011applied}.
% It can be shown, using the characterization in terms of the solution to a Poisson equation
% that the function $\vect e \mapsto M^{\gamma}_{\vect e}$ is a quadratic function of $
% Employing an expansion of~$\mu_{\eta}$ in powers of~$\eta$,
% which can be justified rigorously for sufficiently small $\eta$~\cite[Section~5.2]{MR3509213},
% it is possible to show that the function $\vect e \mapsto M^{\gamma}_{\vect e}$ is quadratic,
% i.e.\ that $M^{\gamma}_{\vect e} = \vect e^\t \mat M^{\gamma} \vect e$ for an appropriate symmetric matrix $\mat M^{\gamma} \in \real^{d \times d}$.
% Notice that the entries of this matrix can be calculated from the directional mobility by using the identity
% \[
% 4 \vect e_1^\t \mat M^{\gamma} \vect e_2
% = (\vect e_1 + \vect e_2) ^\t \mat M^{\gamma} (\vect e_1 + \vect e_2)
% - (\vect e_1 - \vect e_2) ^\t \mat M^{\gamma} (\vect e_1 - \vect e_2)
% = M^{\gamma}_{\vect e_1 + \vect e_2} - M^{\gamma}_{\vect e_1 - \vect e_2}.
% \]
\paragraph{Effective diffusion.}
The concept of effective diffusion,
for the Langevin dynamics~\eqref{eq:langevin},
refers to the following functional central limit theorem:
the diffusively rescaled position process $(\varepsilon \vect q_{t/\varepsilon^2})_{t\geq0}$ converges as $\varepsilon \to 0$,
weakly in the space of continuous functions over compact time intervals,
to a Brownian motion in $\real^d$ with a matrix prefactor~$\sqrt{2 \mat D^{\gamma}}$.
The matrix $\mat D^{\gamma}$ is known as the effective diffusion matrix associated with the dynamics.
This result may be obtained by using the homogenization technique pioneered by Bhattacharya in~\cite{MR663900},
which hinges on the functional central limit theorem for martingales~\cite{MR668684};
see also~\cite[Chapter~3]{MR503330} for early results concerning the asymptotic analysis of SDEs,
the book~\cite[Chapter 18]{pavliotis2008multiscale} for a pedagogical presentation of homogenization for stochastic differential equations,
and~\cite[Theorem 2.5]{MR2793823} for a detailed proof of the homogenization theorem for the Markovian approximation of the generalized Langevin equation.
The precise statement of Einstein's relation is then that
\[
D^{\gamma}_{\vect e} := \vect e^\t \mat D^{\gamma} \vect e = \beta M^{\gamma}_{\vect e}.
\]
\paragraph{Link with the Poisson equation.}
The effective diffusion coefficient
can be expressed in terms of the solution to a partial differential equation (PDE) involving the generator of the Markov semigroup associated with~\eqref{eq:langevin},
which is given by
\begin{equation}
\label{eq:decomposition_generator}
\mathcal L
= \vect p \cdot \grad_{\vect q} - \grad V \cdot \grad_{\vect p} + \gamma \left( - \vect p \cdot \grad_{\vect p} + \beta^{-1} \laplacian_{\vect p} \right)
=: \mathcal L_{\rm Ham} + \gamma \mathcal L_{\rm FD}.
\end{equation}
Specifically, it is possible to show~\cite{MR663900} that
\begin{equation}
\label{eq:effective_diffusion_poisson}
D^{\gamma}_{\vect e} = \ip{\phi_{\vect e}}{\vect e^\t p},
\end{equation}
where $\phi_{\vect e}$ denotes the unique solution to the Poisson equation
\begin{equation}
\label{eq:poisson_equation}
- \mathcal L \phi_{\vect e} = \vect e^\t \vect p,
\qquad \phi_{\vect e} \in L^2_0(\mu) := \bigl\{ u \in L^2(\mu): \ip{u}{1} = 0 \bigr\}.
\end{equation}
Throughout this work,
$\ip{\dummy}{\dummy}$ and $\norm{\dummy}$ denote respectively the inner product and norm of $\lp{2}{\mu}$
unless otherwise specified.
Several techniques can be employed in order to show that~\eqref{eq:poisson_equation} admits a unique solution in $L^2_0(\mu)$
for any right-hand side in $L^2_0(\mu)$:
one may use the approach employed in~\cite[Proposition 5.1]{MR2793823},
which is itself inspired from~\cite[Lemma 2.1]{MR812349},
or obtain well-posedness as a corollary of the exponential decay in $L^2(\mu)$ of the Markov semigroup associated with the dynamics,
as in~\cite[Corollary 1]{roussel2018spectral}.
See also~\cite{Herau06,MR3106879,MR3522857,BFLS20} for other references on the exponential decay for semigroups with a hypocoercive generator.
\paragraph{Numerical estimation of the mobility.}
In spatial dimension 1,
it is possible to obtain an accurate estimation of the effective diffusion coefficient by solving the Poisson equation~\eqref{eq:poisson_equation} using a deterministic method~\cite{roussel2018spectral},
but this approach is generally too computationally expensive in higher dimensions.
In spatial dimension~2, for example,
a spectral discretization of~\eqref{eq:poisson_equation} based on a tensorized basis of functions,
with say~$N$ degrees of freedom per dimension of the state space $\torus^2 \times \real^2$, leads to a linear system with~$N^4$ unknowns,
% (because the dimension of the state space for $(\vect q, \vect p)$ is~4),
which is computationally intractable for large values of $N$.
In this setting, probabilistic methods offer an attractive alternative.
% As will be justified rigorously in \cref{sec:method},
It follows from the definition of~$D^{\gamma}_{\vect e}$ that,
for any $t > 0$,%
\begin{equation}
\label{eq:einsteins_formula}
D^{\gamma}_{\vect e}
=\lim_{\varepsilon \to 0} \frac{\expect\Bigl[\bigl\lvert \vect e^\t \left(\varepsilon \vect q_{t/\varepsilon^2} - \varepsilon \vect q_0\right) \bigr\rvert^2\Bigr]}{2 t}
=\lim_{T \to \infty} \frac{\expect \Bigl[\bigl\lvert \vect e^\t \left(\vect q_T - \vect q_0\right) \bigr\rvert^2\Bigr]}{2T},
\end{equation}
suggesting that this coefficient may be calculated by
estimating the mean square displacement at a sufficiently large time of the equilibrium dynamics~\eqref{eq:langevin}
using Monte Carlo simulation,
which is one of the approaches taken in~\cite{MR2427108}.
Specifically, given a number $J$ of realizations of the dynamics~\eqref{eq:langevin} over a sufficiently long time interval $[0, T]$,
the effective diffusion coefficient in direction $\vect e$ may be estimated as
\begin{equation}
\label{eq:naive_estimator}
\widehat D^{\gamma}_{\vect e}
= \frac{1}{J} \sum_{j=1}^{J} \frac{\left\lvert \vect e^\t \left(\vect q^{(j)}_T - \vect q^{(j)}_0\right) \right\rvert^2}{2T},
\end{equation}
where $(\vect q_t^{(j)}, \vect p_t^{(j)})_{t \geq 0}$, for $1 \leq j \leq J$,
are independent realizations of the solution to the Langevin equation~\eqref{eq:langevin} starting from i.i.d.\ initial conditions $\bigl(\vect q^{(j)}_0, \vect p^{(j)}_0\bigr)$.
The variance reduction approach we propose in the next section aims at reducing the mean square error of estimators of this type.
Another possible approach for estimating the mobility is to rely on a numerical approximation of the Green--Kubo formula;
see \cite[Section 5.1.3]{MR3509213} for general background information on this subject.
The bias associated with this approach is studied carefully in~\cite{LMS16},
and bounds on the variance are obtained in~\cite{PSW21},
showing that the variance increases linearly with the integration time over which correlations are computed.
In practice, choosing the integration time is a delicate task:
it needs to be sufficiently large to ensure that the systematic bias is small,
but not too large, or else the variance of the resulting estimator is large.
The technical challenges impeding adoption of the Green--Kubo formalism,
as well as some solutions to overcome these in the context of heat transport,
are discussed in~\cite{ercole2017accurate,Baroni2020}.
\paragraph{Overdamped and underdamped limits.}
The behavior of the Langevin dynamics~\eqref{eq:langevin} depends on the value of the friction parameter~$\gamma$.
The overdamped limit $\gamma \to \infty$ is well understood;
in this limit, the rescaled position process $(\vect q_{\gamma t})_{t \geq 0}$
converges, weakly in the space of continuous functions~\cite{MR4054345}
and almost surely uniformly over compact subintervals of $[0, \infty)$~\cite[Theorem 10.1]{MR0214150},
to the solution of the overdamped Langevin equation
\begin{equation}
\label{eq:overdamped_langevin}
\d \vect q_t = - \grad V(\vect q_t) \, \d t + \sqrt{2 \beta^{-1}} \, \d \vect b_t,
\end{equation}
where $\vect b_t$ is another standard $d$-dimensional Brownian motion.
It is also possible to prove that~$\gamma \mat D^{\gamma} = \mat D^{\rm ovd} + \bigo{\gamma^{-2}}$ as $\gamma \to \infty$,
where~$\mat D^{\rm ovd}$ is the effective diffusion coefficient of overdamped Langevin dynamics,
and to derive explicit expressions for the correction terms by asymptotic analysis~\cite{MR2394704}.
The diffusion coefficient in the overdamped limit is given by $\vect e^\t \mat D^{\rm ovd} \vect e = \norm{\vect e + \grad \chi_{\vect e}}[L^2(\nu)]^2$,
where $\chi_{\vect e}$ is the unique solution in~$L^2_0(\nu)$ to the Poisson equation
\[
- \mathcal L_{\rm ovd} \chi_{\vect e} = - \vect e^\t \grad V(q), \qquad \mathcal L_{\rm ovd} = - \grad V \cdot \grad + \beta^{-1} \laplacian,
\]
with $\mathcal L_{\rm ovd}$ is the generator of the Markov semigroup associated with~\eqref{eq:overdamped_langevin}.
The reasoning in~\cite[Proposition 4.1]{MR2394704},
when appropriately generalized to the multi-dimensional setting,
shows that~$D^{\rm ovd}_{\vect e}$ is in fact an upper bound for $\gamma D^{\gamma}_{\vect e}$ for all $\gamma > 0$.
The underdamped limit is much more difficult to analyze,
especially in the multi-dimensional setting.
In spatial dimension one, it was shown in~\cite{MR2394704} that $\gamma D^{\gamma} \to D^{\rm und}$ as $\gamma \to 0$ for some limit~$D^{\rm und}$
that is also a lower bound for $\gamma D^{\gamma}$ for all $\gamma > 0$.
It is also possible~\cite[Lemma~3.4]{MR2394704}, in this case,
to show that the solution to the Poisson equation~\eqref{eq:poisson_equation},
when multiplied by $\gamma$, converges in~$\lp{2}{\mu}$ as $\gamma \to 0$ to a limit
which can be calculated explicitly in simple settings~\cite{MR2427108}.
Despite the existence of an asymptotic result,
calculating the mobility for small $\gamma$ is challenging.
Indeed, it can be shown that the spectral gap in $L^2(\mu)$ of the generator $\mathcal L$ behaves as $\mathcal O(\gamma)$ in the limit as $\gamma \to 0$~\cite{MR2394704,MR3106879,MR3522857,roussel2018spectral},
and so deterministic methods for solving the Poisson equation~\eqref{eq:poisson_equation} are ill-conditioned in this limit,
while Monte Carlo based methods are very slow to converge,
as discussed in \cref{sec:method}.
The aforementioned asymptotic result for the underdamped limit extends to the multi-dimensional setting only when the potential is separable,
that is when $V$ can be decomposed as $V(\vect q) = \sum_{i=1}^d V_i(q_i)$, corresponding to a completely integrable Hamiltonian system for $\gamma =0$, but no theoretical results exist in the non-separable case,
which was explored mostly by means of numerical experiments.
Early numerical results in~\cite{chen1996surface}, obtained from Einstein's formula~\eqref{eq:einsteins_formula},
suggest that the effective diffusion coefficient scales as~$\gamma^{-1/2}$ in the underdamped regime for a particular case of a non-separable periodic potential.
Later, in~\cite{Braun02},
different authors note that this behavior as~$\gamma^{-1/2}$ is valid only when $\gamma \in [0.01, 0.1]$,
but not for smaller values of the damping coefficient.
They conclude from simulation results that the effective diffusion coefficient scales as~$\gamma^{-\sigma}$ with $0 \leq \sigma \leq 1/3$ in the underdamped regime,
and suggest that $\sigma$ could be zero for all non-separable potentials.
More recently, in his doctoral thesis~\cite{roussel_thesis},
Roussel calculates the mobility of Langevin dynamics using a control variate approach for linear response,
relying on~\eqref{eq:relation_mobility_diffusion}.
The control variate he employs is constructed from an approximate solution to the Poisson equation~\eqref{eq:poisson_equation}.
His results suggest that, for a wide range of friction coefficients in the interval $[10^{-3}, 1]$
and in the particular case of the potential
\begin{equation}
\label{eq:potential_julien}
V(\vect q) = - \bigl( \cos(q_1) + \cos(q_2) \bigr) + \delta \exp \bigl(\sin(q_1 + q_2)\bigr),
\end{equation}
the mobility scales as $\gamma^{- \sigma}$,
with an exponent $\sigma \in [0, 1]$ that depends on the degree $\delta$ of non-separability of the potential. Despite claims in the physics literature, it is not expected that a universal scaling of mobility, or, equivalently, the effective diffusion coefficient, exists for general classes of non-separable potentials in dimensions higher than one.
\paragraph{Our contributions.}
In this work,
we propose a new variance reduction methodology for calculating the mobility of Langevin-type dynamics.
Like the approach in~\cite{roussel_thesis},
our methodology is based on a control variate constructed from an approximate solution to the Poisson equation~\eqref{eq:poisson_equation},
but it relies on Einstein's formula~\eqref{eq:einsteins_formula} instead of the linear response result~\eqref{eq:relation_mobility_diffusion}.
The advantages of relying on Einstein's formula are twofold:
on the one hand the associated estimators, which are based on~\eqref{eq:naive_estimator},
are asymptotically unbiased,
and on the other hand,
their calculation requires only the first derivatives of the approximate solution to the Poisson equation,
which enables to circumvent regularity issues encountered in~\cite{roussel_thesis} in the underdamped limit.
% A downside of estimators based on~\eqref{eq:naive_estimator}, however,
% is that their variance does not vanish as the simulation time increases,
% and so a large number of independent replicas are required in order to reduce statistical errors.
% The control variate based on an approximate solution to~\eqref{eq:poisson_equation} is a building block of the methodology we propose for improving Monte Carlo estimators based on~\eqref{eq:naive_estimator},
% but the converse viewpoint can also be adopted:
% with our approach or that used in \cite{roussel_thesis},
% Monte Carlo simulations enable to refine the estimation of the effective diffusion coefficient associated with the approximate solution through~\eqref{eq:effective_diffusion_poisson}.
% That is, approximate solutions to the Poisson equation~\eqref{eq:poisson_equation},
% obtained for example through a Galerkin method,
% can be leveraged even if they are inaccurate.
Our contributions in this work are the following.
\begin{itemize}
\item
We derive bounds on the bias and variance of the proposed estimator for the simple case of one-dimensional Langevin dynamics,
in terms of the error on the solution to the Poisson equation~\eqref{eq:poisson_equation}.
Our estimates show, in particular, that the Langevin dynamics should be integrated up to a time scaling as $\max(\gamma^{-1}, \gamma)$ in order to control the bias of the estimator.
\item
We examine the performance of the approach for two different approximate solutions to the Poisson equation:
one is obtained through the Fourier/Hermite Galerkin method developed in~\cite{roussel2018spectral},
and the other is calculated from the limiting solution of the Poisson equation in the underdamped limit;
see~\cite{MR2427108}.
\item
We apply the proposed variance reduction approach to the estimation of mobility for two-dimensional Langevin dynamics in a non-separable periodic potential.
To this end, we construct an approximation to the Poisson equation by tensorization of approximations obtained in one spatial dimension.
We numerically study the performance of this approach,
and present numerical results corroborating the asymptotic behavior as $\gamma^{-\sigma}$ for $\sigma \in (0, 1]$ of the effective diffusion coefficient
observed in~\cite{roussel_thesis}.
\item
Using the proposed variance reduction approach
for calculating the diffusion coefficient of generalized Langevin dynamics in the underdamped regime,
we provide numerical evidence supporting the asymptotic behavior of the effective diffusion coefficient conjectured in our previous work~\cite{GPGSUV21} using formal asymptotics.
\end{itemize}
The rest of the paper is organized as follows.
In~\cref{sec:method},
we present a control variate approach for improving the naive Monte Carlo estimator~\eqref{eq:naive_estimator},
and obtain bounds on the bias and variance of the improved estimator in the particular case of Langevin dynamics~\eqref{eq:langevin}.
In~\cref{sec:application_to_one_dimensional_langevin_type_dynamics},
we employ the proposed approach for calculating the mobility of one-dimensional Langevin and generalized Langevin dynamics,
as a proof of concept,
and we assess the performance of various control variates in terms of variance reduction.
In~\cref{sec:applications_2d},
we present numerical results for two-dimensional Langevin dynamics,
exhibiting a scaling as $\gamma^{-\sigma}$ of the mobility in the underdamped regime.
\Cref{sec:conclusions_and_perspectives_for_future_work} is reserved for conclusions and perspectives for future work,
while the appendices contain technical results employed in~\cref{sec:application_to_one_dimensional_langevin_type_dynamics}.
\section{Improved Monte Carlo estimator for the diffusion coefficient}%
\label{sec:method}%
Throughout this section,
we focus on the Langevin dynamics~\eqref{eq:langevin} for simplicity.
Although some of our arguments are tailored specifically to this dynamics,
our approach may in principle be applied to other Langevin-type dynamics,
such as the generalized Langevin dynamics considered in \cref{sub:generalization_to_generalized_langevin_dynamics}.
We assume throughout the section that $(\vect q_t, \vect p_t)_{t\geq 0}$ is a solution of~\eqref{eq:langevin} with statistically stationary initial condition~$(\vect q_0, \vect p_0) \sim \mu$ independent of the Brownian motion $(\vect w_t)_{t \geq 0}$.
This is not a restrictive assumption in our setting as the probability measure $\mu$,
being defined explicitly on the low-dimensional space $\torus^d \times \real^d$,
can be sampled efficiently using standard methods,
for instance by rejection sampling.
Let us fix a direction $\vect e \in \real^d$, with $|\vect e| = 1$,
and denote again by $\phi_{\vect e}$ the corresponding solution to the Poisson equation~\eqref{eq:poisson_equation}.
Since the number of independent realizations in Monte Carlo estimators
appears only as a denominator in the variance,
we study estimators based on one realization only.
That is, instead of~\eqref{eq:naive_estimator}, we take as point of comparison the naive estimator
\begin{equation}
\label{eq:simple_estimator}
u(T) = \frac{\abs{\vect e^\t (\vect q_T - \vect q_0)}^2}{2T}.
\end{equation}
This section is divided into three parts.
In \cref{sub:construction_of_an_improved_estimator},
we construct a Monte Carlo estimator for the effective diffusion coefficient that improves on~\eqref{eq:simple_estimator}.
We then demonstrate in \cref{sub:bias} and \cref{sub:variance} that,
at least in certain parameter regimes,
this estimator has better properties than~\eqref{eq:simple_estimator} in term of bias and variance, respectively.
\subsection{Construction of an improved estimator}%
\label{sub:construction_of_an_improved_estimator}
In order to motivate the construction of an improved estimator for $D^{\gamma}_{\vect e}$,
we apply It\^o's formula to the solution $\phi_{\vect e}$ to the Poisson equation~\eqref{eq:poisson_equation},
which gives
\[
\phi_{\vect e}(\vect q_T, \vect p_T) - \phi_{\vect e}(\vect q_0, \vect p_0)
= - \int_{0}^{T} \vect e^\t \vect p_t \, \d t + \sqrt{2 \gamma \beta^{-1}} \int_{0}^{T} \grad_{\vect p} \phi_{\vect e}(\vect q_t, \vect p_t) \cdot \d \vect w_t.
\]
Rearranging the terms,
we obtain
\begin{equation}
\label{eq:ito_for_phi}
\vect e^\t(\vect q_T - \vect q_0) =
\phi_{\vect e}(\vect q_0, \vect p_0) - \phi_{\vect e}(\vect q_T, \vect p_T)
+ \sqrt{2 \gamma \beta^{-1}} \int_{0}^{T} \grad_{\vect p} \phi_{\vect e}(\vect q_t, \vect p_t) \cdot \d \vect w_t.
\end{equation}
% Let us assume that $\psi_{\vect e}$ is an approximation of $\phi_{\vect e}$,
The estimator we propose requires the knowledge of an approximation $\psi_{\vect e}$ of the solution $\phi_{\vect e}$ to the Poisson equation~\eqref{eq:poisson_equation}.
Two concrete methods for obtaining such an approximation in the small $\gamma$ regime are presented in \cref{sec:application_to_one_dimensional_langevin_type_dynamics}.
In this section, we assume that such an approximation is given.
Let us introduce
\begin{align}
\label{eq:definition_control_variate}
\xi_T = \psi_{\vect e}(\vect q_0, \vect p_0) - \psi_{\vect e}(\vect q_T, \vect p_T)
+ \sqrt{2 \gamma \beta^{-1}} \int_{0}^{T} \grad_{\vect p} \psi_{\vect e}(\vect q_t, \vect p_t) \cdot \d \vect w_t.
\end{align}
\begin{remark}
By It\^o's formula,
it would have been equivalent, in the case where $\psi_{\vect e}$ is smooth,
to define
\(
\xi_T = \int_{0}^{T} \mathcal L \psi_{\vect e}(\vect q_t, \vect p_t) \, \d t.
\)
However, the definition~\eqref{eq:definition_control_variate} makes sense even if $\psi_{\vect e}$ is differentiable only once,
and so it is more widely applicable.
In~\cref{sub:underdamped_approach}, for example, we construct a singular approximation~$\psi_{\vect e}$ that is not twice weakly differentiable.
% but in~\cref{sec:application_to_one_dimensional_langevin_type_dynamics} we will employ approximations $\psi_{\vect e}$ which are differentiable only once.
\end{remark}
Since $\xi_T$ is expected to be a good approximation of $\vect e^\t(\vect q_T - \vect q_0)$,
in some appropriate sense,
when~$\psi_{\vect e}$ is a good approximation of~$\phi_{\vect e}$,
one may achieve a reduction in variance by using the former as a control variate for the latter.
More precisely, we consider the following estimator instead of~$u(T)$:
\begin{equation}
\label{eq:simple_estimator_improvement_1}
\frac{\bigl\lvert \vect e^\t (\vect q_T - \vect q_0) \bigr\rvert^2}{2T} - \alpha \left( \frac{\bigl\lvert \xi_T \bigr\rvert^2}{2T} - \expect \left[ \frac{\lvert \xi_T \rvert^2}{2T} \right] \right)
=: u(T) - \alpha \Bigl(\widehat u(T) - \expect \left[\widehat u(T)\right]\Bigr).
\end{equation}
Clearly, this estimator and $u(T)$ have the same expectation, and thus the same bias.
By standard properties of control variates~\cite{kroese2013handbook},
the value of $\alpha$ minimizing the variance can be expressed in terms of the variance of $u(T)$ and
the covariance between $u(T)$ and $\widehat u(T)$.
For simplicity of the analysis,
we consider only the case $\alpha = 1$,
which is the variance-minimizing choice when~$\widehat u(T) = u(T)$.
% and it yields good results as long as $\widehat u(T)$ is a good approximation of~$u(T)$.
We mention in passing that the idea of constructing control variates by means of approximate solutions of an appropriate Poisson equation forms the basis of the so-called zero variance Markov Chain Monte Carlo methodology~\cite{papamarkou_al_2014}.
The estimator can be further modified by replacing the expectation in~\eqref{eq:simple_estimator_improvement_1},
which is intractable analytically,
by its value in the limit as $T \to \infty$;
that is, we define
\begin{equation}
\label{eq:improved_estimator}
v(T) = \frac{\bigl\lvert \vect e^\t(\vect q_T - \vect q_0) \bigr\rvert^2}{2T} - \frac{\bigl\lvert \xi_T \bigr\rvert^2}{2T} + \lim_{T \to \infty} \expect \left[\frac{\bigl\lvert \xi_T \bigr\rvert^2}{2T}\right],
\end{equation}
Note that $v(T) = u(T)$ if $\psi_{\vect e} = 0$.
The expectation of $v(T)$ is different from that of $u(T)$,
but the two expectations coincide asymptotically as $T \to \infty$.
Furthermore, unlike the expectation in~\eqref{eq:simple_estimator_improvement_1},
the limit in the last term on the right-hand side of~\eqref{eq:improved_estimator} can be calculated explicitly,
and so the estimator $v(T)$ can be employed in practice.
\begin{lemma}
\label{lemma:explicit_limit}
Assume that $\psi_{\vect e} \in L^2(\mu)$ and $\grad_{\vect p} \psi_{\vect e} \in L^2(\mu)$.
Then
\begin{equation}
\label{eq:explicit_limit}
\lim_{T \to \infty} \expect \left[\frac{\bigl\lvert \xi_T \bigr\rvert^2}{2T}\right] = \gamma \beta^{-1} \int_{\torus^d \times \real^d} \abs{\grad_{\vect p} \psi_{\vect e}}^2 \, \d \mu =: d[\psi_{\vect e}].
\end{equation}
\end{lemma}
\begin{proof}
% By It\^o's formula we can rewrite~\eqref{eq:definition_control_variate} as $\xi_T = \int_{0}^{T} \mathcal L \psi_{\vect e}(\vect q_t, \vect p_t) \, \d t$,
% so~\eqref{eq:explicit_limit} follows from a central limit theorem for additive functionals of Markov diffusion processes~\cite{MR663900,MR3069369}.
% The idea of the proof, given here because it contains manipulations used throughout this section, is as follows.
Let us introduce the notation
\[
\theta_T = \psi_{\vect e}(\vect q_0, \vect p_0) - \psi_{\vect e}(\vect q_T, \vect p_T),
\qquad
M_T = \sqrt{2 \gamma \beta^{-1}} \int_{0}^{T} \grad_{\vect p} \psi_{\vect e}(\vect q_t, \vect p_t) \cdot \d \vect w_t.
\]
% The central limit theorem for martingales~\cite{MR668684} implies the convergence in distribution of $M_T/\sqrt{T}$ to a mean-zero Gaussian random variable.
From the definition~\eqref{eq:definition_control_variate},
we have
\[
\frac{\lvert \xi_T \rvert^2}{2T} = \frac{\theta_T^2}{2T} + \frac{M_T^2}{2T} + \left(\frac{\theta_T}{\sqrt{T}}\right) \left(\frac{M_T}{\sqrt{T}}\right).
\]
Given that $\psi_{\vect e} \in L^2(\mu)$ and that we assume stationary initial conditions,
so that $(\vect q_T, \vect p_T) \sim \mu$ as well,
the expectation of the first term tends to 0 in the limit as $T \to 0$.
The expectation of the second term can be calculated from It\^o's isometry:
\[
\expect \left[ \frac{M_T^2}{2T} \right]
= \frac{\gamma \beta^{-1}}{T} \int_{0}^{T} \expect \Bigl[\left\lvert \grad_{\vect p} \psi_{\vect e}(\vect q_t, \vect p_t) \right\rvert^2\Bigr] \, \d t
= \gamma \beta^{-1} \int_{\torus^d \times \real^d} \left\lvert \grad_{\vect p} \psi_{\vect e}(\vect q, \vect p) \right\rvert^2 \, \d \mu
= d[\psi_{\vect e}].
\]
The expectation of the third term converges to zero by the Cauchy--Schwarz inequality,
which concludes the proof of~\eqref{eq:explicit_limit}.
\end{proof}
Repeating verbatim the reasoning in the proof of~\cref{lemma:explicit_limit} with~$\phi_{\vect e}$ instead of $\psi_{\vect e}$ and $\vect e^\t (\vect q_T - \vect q_0)$ instead of $\xi_T$
(see~\eqref{eq:ito_for_phi}),
we obtain that
\[
\lim_{T \to \infty} \expect [u(T)] = d[\phi_{\vect e}],
\]
implying that $d[\phi_{\vect e}] = D^{\gamma}_{\vect e}$,
since the limit on the left-hand side of this equation is by definition~$D^{\gamma}_{\vect e}$ in view of~\eqref{eq:einsteins_formula}.
This equality can also be shown from~\eqref{eq:effective_diffusion_poisson} by integrating by parts in the formula for $d[\phi_{\vect e}]$:
\begin{align}
\notag
d[\phi_{\vect e}]
&= \gamma \beta^{-1} \int_{\torus^d \times \real^d} \abs{\grad_{\vect p} \phi_{\vect e}}^2 \, \d \mu
= \gamma \beta^{-1} \int_{\torus^d \times \real^d} \bigl((\beta \grad V - \grad_{\vect p}) \cdot \grad_{\vect p}\phi_{\vect e} \bigr) \phi_{\vect e} \, \d \mu \\
\label{eq:equivalent_definition_effective_diffusion}
&= -\int_{\torus^d \times \real^d} (\gamma \mathcal L_{\rm FD} \phi_{\vect e}) \phi_{\vect e} \, \d \mu
= -\int_{\torus^d \times \real^d} (\mathcal L \phi_{\vect e}) \phi_{\vect e} \, \d \mu
= D^{\gamma}_{\vect e},
\end{align}
where the skew-symmetry of $\mathcal L_{\rm ham}$ in $L^2(\mu)$ is employed in the second line.
By construction, it is clear that the improved estimator~\eqref{eq:improved_estimator} is asymptotically unbiased.
If~$\psi_{\vect e} = \phi_{\vect e}$, then this estimator is unbiased also for finite~$T$.
% A direct corollary of~\eqref{eq:intermediate_equation} is that $v(T)$ is unbiased if $\psi_{\vect e} = \phi_{\vect e}$, even for finite $T$.
By a slight abuse of terminology,
we refer to the process $(\xi_t)_{t \geq 0}$ as the \emph{control variate} in the rest of this work.
% In the rest of this section,
% we often use yet another formula for $D^{\gamma}_{\vect e}$:
% \[
% D^{\gamma}_{\vect e} = \int_{0}^{\infty} \expect (\vect e^\t \vect p_s) .
% \]
% Next, we show a preliminary result on the bias of the estimator $v(T)$.
% Later, in \cref{lemma:bias_improved},
% we obtain a finer result showing that the bias scales as $T^{-1}$ in the limit as $T \to \infty$,
% with an explicit prefactor in terms of~$\gamma$.
% \begin{lemma}
% \label{lemma:asymptotic_bias}
% The estimator $v(T)$ is unbiased asymptotically as $T \to \infty$,
% that is
% \begin{equation}
% \label{eq:estimator_is_unbiased}
% \lim_{T \to \infty} \expect \bigl[v(T)\bigr]
% = D^{\gamma}_{\vect e}.
% \end{equation}
% \end{lemma}
% \begin{proof}
% In view of the definition~\eqref{eq:improved_estimator},
% it is sufficient to show that $u(T)$ is asymptotically unbiased.
% This was already motivated in~\eqref{eq:einsteins_formula} using a central limit theorem,
% but we include a short proof here for completeness.
% Repeating verbatim the reasoning of~\cref{lemma:explicit_limit} with~$\phi_{\vect e}$ instead of $\psi_{\vect e}$ and $\vect e^\t (\vect q_T - \vect q_0)$ instead of $\xi_T$,
% we obtain
% \begin{equation}
% \label{eq:intermediate_equation}
% \expect \bigl[u(T)\bigr]
% \xrightarrow[T \to \infty]{} d[\phi_{\vect e}],
% \end{equation}
% This concludes the proof since $d[\phi_{\vect e}]$ coincides with $D^{\gamma}_{\vect e}$ in~\eqref{eq:effective_diffusion_poisson} by~\eqref{eq:equivalent_definition_effective_diffusion}.
% \end{proof}
\begin{remark}
\label{remark:cost_control_variate}
Notice that calculating the control variate $\xi_T$ in~\eqref{eq:definition_control_variate} requires
to evaluate $\psi(q_t, p_t)$ at times 0 and $T$ and the gradient $\grad_{\vect p} \psi_{\vect e}(\vect q_t, \vect p_t)$ along the full trajectory $(\vect q_t, \vect p_t)_{0\leq t\leq T}$.
Therefore, it is important for efficiency that $\grad_{\vect p} \psi_{\vect e}$ is not computationally expensive to evaluate.
\end{remark}
In the next subsections,
we obtain non-asymptotic results on the bias of the estimator $v(T)$ in~\cref{sub:bias},
and bounds on its variance in~\cref{sub:variance}.
Before this,
in order to build intuition and motivate our results,
we scrutinize two settings where
explicit expressions of the bias and variance of the estimator $u(T)$ can be obtained:
constant potential and quadratic potential (for systems in $\real^d$ rather than $\torus^d$).
In the rest of this section,
we employ the notation $\e^{\mathcal L t}$ to denote the Markov semigroup corresponding to the stochastic dynamics~\eqref{eq:langevin}:
\[
\left(\e^{\mathcal L t} \varphi\right) (\vect q, \vect p) = \expect \bigl(\varphi(\vect q_t, \vect p_t) \big| (\vect q_0, \vect p_0) = (\vect q, \vect p)\bigr).
\]
% , or of the velocity auto-correlation function,
\begin{example}
[Constant potential]
\label{example:constant}
Consider the case where $V(q) = 0$ in dimension $d = 1$
(henceforth we drop the $\vect e$ subscript and the bold notation for $\vect q$ and $\vect p$).
In this case, the solution to the Poisson equation $- \mathcal L \phi = p$ is given by $\phi(q, p) = \gamma^{-1} p$,
and applying Itô's formula to this function we obtain
(this also follows directly from a time integration of~\eqref{eq:langevin_p})
\[
\gamma^{-1}(p_t - p_0) = - \int_{0}^{t} p_s \, \d s + \sqrt{2 \gamma^{-1} \beta^{-1}} w_t
= q_0 - q_t + \sqrt{2 \gamma^{-1} \beta^{-1}} w_t.
\]
Using the explicit solution to the Ornstein--Uhlenbeck equation satisfied by $p$,
we deduce that
\begin{align*}
q_t - q_0
&= - \gamma^{-1} \left( p_0 \left(\e^{-\gamma t} - 1\right) + \sqrt{2 \gamma \beta^{-1}}\int_{0}^{t} \e^{-\gamma (t - s)} \, \d w_s \right)
+ \sqrt{2 \gamma^{-1} \beta^{-1}} w_t \\
&= - \gamma^{-1} p_0 \left(\e^{-\gamma t} - 1\right) + \sqrt{2 \gamma^{-1} \beta^{-1}}\int_{0}^{t} \left(1 - \e^{-\gamma (t - s)}\right) \, \d w_s.
\end{align*}
The assumptions on the initial condition imply that $p_0 \sim \mathcal N(0, \beta^{-1})$ and that $p_0$ is independent of $(w_t)_{t \geq 0}$,
so the right-hand side of this equation is a mean-zero Gaussian random variable.
Using It\^o's isometry, we calculate that $\expect \bigl[ u(T) \bigr]$ is given by
\begin{align*}
% \expect \bigl[ u(T) \bigr]
\frac{\expect \bigl[\abs{q_T - q_0}^2\bigr]}{2T}
&= \frac{\lvert \e^{-\gamma T} - 1 \rvert^2 + 2 \gamma T - 4 (1 - \e^{-\gamma T}) + 1 - \e^{-2 \gamma T}}{2 \gamma^2 \beta T} \\
&= \frac{1}{\gamma \beta} \left( 1 + \frac{1}{T \gamma} \left(\e^{-\gamma T} - 1\right) \right) =: \sigma_T^2.
\end{align*}
This equation implies that the effective diffusion coefficient in this example is $D^{\gamma} = \gamma^{-1} \beta^{-1}$,
and that the relative bias is bounded from above by $(T \gamma)^{-1}$.
% which is consistent with~\eqref{eq:bias} in the limit~$\gamma \to 0$,
% and even better than~\eqref{eq:refined_bound_for_u} in the limit $\gamma \to \infty$.
Furthermore,
since $\frac{\lvert q_T- q_0 \rvert^2}{2T\sigma_T^2}$ is distributed according to $\chi^2(1)$,
the variance of $u(T)$ is equal to
\[
\var \bigl[u(T)\bigr] = 2 \bigl(\expect [u(T)]\bigr)^2 = 2 \sigma_T^4 \xrightarrow[T \to \infty]{} 2 \lvert D^{\gamma} \rvert^2 .
\]
Note that this variance does not converge to 0 as $T \to \infty$,
a result further made precise for generic potentials in \cref{proposition:variance,proposition:asymptotic_variance}.
\end{example}
The case of a confining quadratic potential is degenerate,
in the sense that the associated effective diffusion coefficient is zero.
In this example,
we also obtain an explicit expression for the velocity autocorrelation function,
in order to motivate \cref{proposition:semigroup_meanzero_observable} below.
% Nevertheless, the bias and variance of the estimators $u(T)$ and $v(T)$ can still be studied in this case.
\begin{example}
[Quadratic potential]
\label{example:quadratic}
We now consider the case of the one-dimensional quadratic confining potential $V(q) = \frac{k q^2}{2}$,
and assume for simplicity $\gamma^2 - 4 k \neq 0$.
In this case, the eigenfunctions of $\mathcal L$ are polynomials
and, for every $n \geq 0$, the vector space $\poly(n)$ of polynomials of degree less than or equal to $n$
contains an orthonormal basis of eigenfunctions of $\mathcal L$~\cite[Section~6.3]{pavliotis2011applied}.
% This is not surprising, considering that $\mathcal L$ maps $\poly(d)$ into itself.
In particular, the constant function is an eigenfunction with eigenvalue 0,
and the two other eigenfunctions in~$\poly(1)$, together with their associated eigenvalues,
are given by
\begin{equation*}
g_{\pm}(q, p) =
- \lambda_{\mp} q + p, \\
\qquad
\lambda_{\pm} = \frac{- \gamma \pm \sqrt{\gamma^2 - 4k}}{2}.
\end{equation*}
Here the radical symbol $\sqrt{\cdot}$ denotes the principal square root;
for a complex number $z$, this is defined as $\sqrt{z} = \sqrt{r} \, \e^{\imag \theta/2}$ where $(r, \theta) \in [0, \infty) \times (-\pi, \pi]$ are the polar coordinates of~$z$.
The coordinate functions $(q, p) \mapsto q$ and $(q, p) \mapsto p$ are the following linear combinations of $g_+$ and~$g_-$:
\[
q = \frac{g_+(q,p) - g_-(q,p)}{\lambda_+ - \lambda_-},
\qquad
p = \frac{\lambda_+ g_+(q,p) - \lambda_- g_-(q,p)}{\lambda_+ - \lambda_-}.
\]
Therefore,
using the assumption that $(q_0, p_0) \sim \mu$,
we have
\begin{align*}
\expect \bigl[ \lvert q_T - q_0 \rvert^2 \bigr]
&= 2 \norm{q}^2 - 2 \ip{\e^{T \mathcal L}q}{q}
= 2 \norm{q}^2 - 2 \frac{\ip{\e^{\lambda_+ T} g_+ - \e^{\lambda_- T} g_-}{q}}{\lambda_+ - \lambda_-} \\
&= \frac{2}{k \beta} \left( 1 + \frac{\lambda_- \e^{\lambda_+ T} - \lambda_+ \e^{\lambda_- T}}{\lambda_+ - \lambda_-} \right).
\end{align*}
This implies that
\(
T \expect \bigl[ u(T) \bigr] \to (k \beta)^{-1}
\)
in the limit as~$T \to \infty$,
and so $D^{\gamma} = 0$ as expected.
Similarly, it is not difficult to show $T^2\var \bigl[ u(T) \bigr] \to 2 (k \beta)^{-2}$ in the same limit;
in this case, the variance is 0 asymptotically.
% and so the solution to the Poisson equation~\eqref{eq:poisson_equation} is given by
% \[
% \phi = \frac{g_+ - g_-}{\sqrt{\gamma^2 - 4k}} = q,
% \]
% which shows that $D^{\gamma} = 0$, as was expected.
% Given that $\ip{g_+}{p} = \ip{g_-}{p} = \beta^{-1}$,
Using that $\ip{g_+}{p} = \ip{g_-}{p} = \beta^{-1}$,
we can also calculate the velocity autocorrelation function:
\begin{equation}
\label{eq:velocity_autocorrelation_quadratic}
\ip{\e^{t \mathcal L}p}{p} =
\frac{\lambda_+ \e^{\lambda_+ t} - \lambda_- \e^{\lambda_- t}}{\beta(\lambda_+ - \lambda_-)}.
\end{equation}
In the limit as $\gamma \to \infty$,
it holds that $\lambda_+ \sim - k/\gamma$ and $\lambda_- \sim - \gamma$.
In this limit,
the factor multiplying the slowly decaying exponential $\e^{\lambda_+ t}$ in~\eqref{eq:velocity_autocorrelation_quadratic} scales as~$\mathcal O(\gamma^{-2})$,
whereas the factor multiplying the rapidly decaying exponential $\e^{\lambda_- t}$ scales as $\mathcal O(1)$.
We demonstrate in \cref{proposition:semigroup_meanzero_observable} that a similar property holds more generally.
\end{example}
\subsection{Bias of the estimators for the effective diffusion coefficient}%
\label{sub:bias}
In this subsection,
we begin by studying the bias of the standard estimator $u(T)$ in~\cref{ssub:bias_of_the_standard_estimator},
% which coincides with $v(T)$ when~$\psi_{\vect e} = 0$,
and then the bias of the improved estimator $v(T)$ in~\cref{ssub:bias_of_the_improved_estimator}.
Although we use, in \cref{sec:application_to_one_dimensional_langevin_type_dynamics,sec:applications_2d},
approximate solutions $\psi_{\vect e}$ of the Poisson equation~\eqref{eq:poisson_equation} that are not twice differentiable,
we focus in this section on the case where $\psi_{\vect e}$ is at least twice differentiable for simplicity of the analysis.
\subsubsection{Bias of the standard estimator}%
\label{ssub:bias_of_the_standard_estimator}
We first obtain in~\cref{lemma:easy_lemma} a simple bound on the bias based on standard results in the literature.
We then motivate, with the help of~\cref{example:quadratic},
that this result is not optimal in the overdamped regime and,
after obtaining a decay estimate for correlation functions of the form $t \mapsto \ip{\e^{t \mathcal L} f}{h}$ with $f$ and $h$ functions depending only on $p$,
we prove a finer bound on the bias in~\cref{corollary:better_bias}.
\begin{lemma}
[Preliminary bound on the bias of the standard estimator]
\label{lemma:easy_lemma}
There exists a positive constant~$C$such that
\begin{equation}
\label{eq:bias_simple}
\forall \gamma \in (0, \infty),
\quad
\forall T > 0,
\qquad
\abs{\expect \bigl[u(T)\bigr] - D_{\vect e}^{\gamma}}
\leq
% \frac{L}{\beta T}\int_{0}^{\infty} \exp\bigl(- \ell \theta \min\{\gamma, \gamma^{-1}\}\bigr) \theta \, \d \theta =
\frac{C \max \{\gamma^{-2}, \gamma^2\}}{\beta T}.
\end{equation}
\end{lemma}
\begin{proof}
Since the initial conditions are assumed statistically stationary,
it holds that
\begin{align}
\notag
\expect \bigl[u(T)\bigr]
&= \frac{1}{2T} \expect \left[ \int_{0}^{T} \vect e^\t \vect p_{t} \, \d t \int_{0}^{T} \vect e^\t \vect p_s \, \d s \right]
= \frac{1}{2T} \int_{0}^{T} \!\! \int_{0}^{T} \expect \left[ \left(\vect e^\t \vect p_t\right) \left(\vect e^\t \vect p_s\right) \right] \, \d s \, \d t \\
\label{eq:first_equation_bias}
&= \frac{1}{T} \int_{0}^{T} \!\! \int_{0}^{t} \expect \left[ \left(\vect e^\t \vect p_t\right) \left(\vect e^\t \vect p_s\right) \right] \, \d s \, \d t,
\end{align}
since the contribution of the domain $0 \leq t \leq s \leq T$ is the same as that of $0 \leq s \leq t \leq T$.
The stationarity of the velocity process implies that, for ~$t \geq s$,
\begin{align*}
\expect \left[ \left(\vect e^\t \vect p_t\right) \left(\vect e^\t \vect p_s\right) \right]
&= \expect \left[ \left(\vect e^\t \vect p_{t-s}\right) \left(\vect e^\t \vect p_0\right) \right]
% = \expect \left[ \expect \left[ \left(\vect e^\t \vect p_{t-s}\right) \left(\vect e^\t \vect p_0\right) \middle| (\vect q_0, \vect p_0) \right] \right] \\
% &= \expect \left[ \expect \left[ \left(\vect e^\t \vect p_{t-s}\right) \middle| (\vect q_0, \vect p_0) \right] \left(\vect e^\t \vect p_0\right)\right] \\
% &= \expect \left[ \Bigl( \e^{(t-s) \mathcal L} \left(\vect e^\t \vect p\right) \Bigr) (\vect q_0,\vect p_0) \left(\vect e^\t \vect p_0\right)\right]
= \ip{\e^{(t-s) \mathcal L} (\vect e^\t \vect p)}{\vect e^\t \vect p}.
\end{align*}
Substituting this expression in~\eqref{eq:first_equation_bias} and letting $\theta = t-s$ leads to
\begin{equation}
\label{eq:bias_without_control}
\begin{aligned}[b]
\expect \bigl[u(T)\bigr]
&= \int_{0}^{T} \ip{\e^{\theta \mathcal L} (\vect e^\t \vect p)}{\vect e^\t \vect p} \left(1 - \frac{\theta}{T}\right) \d \theta \\
&= \int_{0}^{\infty} \ip{\e^{\theta \mathcal L}(\vect e^\t \vect p)}{\vect e^\t \vect p} \d \theta
- \int_{0}^{\infty} \ip{\e^{\theta \mathcal L} (\vect e^\t \vect p)}{\vect e^\t \vect p} \min\left\{1, \frac{\theta}{T}\right\} \, \d \theta.
% D_{\phi} - \int_{0}^{\infty} \min\left(1, \frac{s}{t}\right) C_{\phi}(s) \, \d s.
\end{aligned}
\end{equation}
As we shall demonstrate, the second term tends to 0 in the limit as $T \to \infty$.
Therefore, since the estimator $u(T)$ is asymptotically unbiased,
the first term must coincide with the effective diffusion coefficient~$D^{\gamma}_{\vect e}$
-- this is in fact the well known Green--Kubo formula for the effective diffusion coefficient, see e.g.~\cite{pavliotis2011applied,MR3509213}.
The Green--Kubo formula can also be derived from~\eqref{eq:effective_diffusion_poisson} by using the representation formula $\phi_{\vect e} = \int_{0}^{\infty} \e^{\theta \mathcal L} \left(\vect e^\t \vect p\right) \, \d \theta$,
which is well defined in view of the exponential decay of $\e^{\theta \mathcal L}$ on $L^2_0(\mu)$,
see~\eqref{eq:decay_semigroup_general} below.
The second term in~\eqref{eq:bias_without_control} is the bias.
% As a first attempt towards bounding the latter term,
In order to bound this term,
we use a general bound for the Markov semigroup associated with Langevin dynamics
stating that
\begin{equation}
\label{eq:decay_semigroup_general}
\forall \gamma > 0, \qquad \forall \theta \geq 0, \qquad
\norm*{ \e^{\theta \mathcal L} }[\mathcal B \left(L^2_0\left(\mu\right) \right)] \leq L \exp \bigl(- \ell \theta \min\{\gamma, \gamma^{-1}\} \bigr)
\end{equation}
for appropriate constants $L > 0$ and $\ell > 0$.
Here $\mathcal B\left(L^2_0(\mu)\right)$ is the Banach space of bounded linear operators on $L^2_0(\mu)$,
and $\norm{\cdot}_{\mathcal B\left(L^2_0(\mu)\right)}$ is the usual associated norm.
This result is proved in~\cite{MR2394704} for $\gamma \in (0, 1)$ using the $H^1$ hypocoercivity approach~\cite{MR2562709},
and later in~\cite{MR3106879} for general $\gamma \in (0, \infty)$, in the Fokker--Planck setting,
using the direct $L^2(\mu)$ hypocoercivity approach pioneered in~\cite{Herau06,MR2576899,MR3324910}.
The latter approach is revisited in the backward Kolmogorov setting in~\cite{MR3522857,roussel2018spectral}.
An application of the bound~\eqref{eq:decay_semigroup_general} gives
\begin{equation}
\label{eq:initial_bound_velocity_autocorrelation}
\left\lvert \ip{\e^{\theta \mathcal L}(\vect e^\t p)}{\vect e^\t p} \right\rvert
\leq \norm{\e^{\theta \mathcal L}(\vect e^\t p)} \norm{\vect e^\t p}
\leq L \beta^{-1} \exp\bigl(- \ell \theta \min\{\gamma, \gamma^{-1}\}\bigr).
\end{equation}
Noting that
\begin{equation}
\label{eq:petit_calcul}
\forall \lambda > 0, \qquad
\int_{0}^{\infty} \e^{-\lambda \theta} \min\left\{ 1, \frac{\theta}{T} \right\} \, \d \theta
% \leq \frac{1}{T} \int_{0}^{\infty} \theta \e^{-\lambda \theta} \, \d \theta
= \frac{1 - \e^{- \lambda T}}{\lambda^2 T} \leq \frac{1}{\lambda^2 T},
\end{equation}
we obtain~\eqref{eq:bias_simple}.
\end{proof}
Since the effective diffusion coefficient scales as $\gamma^{-1}$ in both the underdamped ($\gamma \to 0$) and overdamped limits ($\gamma \to \infty$)~\cite{MR2394704,MR2427108},
this estimate~\eqref{eq:bias_simple} suggests that the relative bias of the estimator scales as $\max\{\gamma^{-1}, \gamma^3\} T^{-1}$ and that,
consequently, the integration time $T$ should scale proportionally to $\max\{\gamma^{-1}, \gamma^3\}$ in order to achieve a given relative accuracy.
It turns out that the estimate~\eqref{eq:initial_bound_velocity_autocorrelation} is not optimal in the overdamped regime,
which is clear in the case of quadratic potential; see~\eqref{eq:velocity_autocorrelation_quadratic} in \cref{example:quadratic}.
We derive a sharper estimate from the following~\cref{proposition:semigroup_meanzero_observable}.
In order to state this result,
we introduce the operators~$\Pi_{\vect p}\colon \lp{2}{\mu} \rightarrow \lp{2}{\mu}$ and $\Pi_{\vect p}^\perp = \id - \Pi_{\vect p}$,
with
\[
\Pi_{\vect p} u (\vect q) = \int u(\vect q, \vect p) \, \kappa(\d \vect p).
\]
The operators $\Pi_{\vect p}$ and $\Pi_{\vect p}^\perp$ are respectively the $L^2(\mu)$ projection operators onto
the subspace of functions depending only on $\vect q$,
and the subspace of functions with average $0$ in $\vect p$ (with respect to the marginal distribution $\kappa$, defined in~\eqref{eq:definition_prob_measures}, and for almost every $\vect q \in \torus^d$).
We also introduce the space $H^{1,\vect q}(\mu)$ of functions in $L^2(\mu)$ with their $\vect q$-gradient also in $L^2(\mu)$,
and the associated norm $\norm{\dummy}[1,\vect q] = \norm{\dummy} + \norm{\grad_{\vect q} \dummy}$.
\begin{proposition}
\label{proposition:semigroup_meanzero_observable}
Assume that $f \in H^{1,\vect q}(\mu)$ and $h \in H^{1,\vect q}(\mu)$ are smooth functions in~$\Pi_{\vect p}^\perp \lp{2}{\mu}$.
Then there exist positive constants $A$ and $a$, independent of $f$ and $h$, such that
\begin{equation}
\label{eq:optimal_decay_correlation}
\forall \gamma \geq 1, \quad
\forall t \geq 0, \qquad
\abs{\ip{\e^{t \mathcal L}f}{h}}
\leq A \norm{f}[1,\vect q] \norm{h}[1,\vect q] \left( \gamma^{-2} \e^{- a \gamma^{-1} t} + \e^{- a \gamma t} \right).
\end{equation}
\end{proposition}
This result, proved in \cref{sec:auxiliary_technical_results},
enables to show the following bound on the bias of~$u(T)$,
which is better than~\cref{lemma:easy_lemma} in the large $\gamma$ regime.
Roughly speaking, \cref{proposition:semigroup_meanzero_observable} states that,
when $\gamma \gg 1$ and $f$ and $h$ are mean-zero in $\vect p$,
correlations of the form $\expect \bigl[ f(\vect p_t) h(\vect p_0) \bigr]$ are~$\mathcal O(\gamma^{-2})$ small after a small time of order $\mathcal O(\gamma^{-1} \log \gamma)$,
despite the fact that their asymptotic decay as~$\e^{- a \gamma^{-1} t}$ is slow.
% In other words, information on the starting $p$ is quickly forgotten.
\begin{corollary}
[Bias of the standard estimator]
\label{corollary:better_bias}
There exists a positive constant~$\widehat C$ such that
\begin{equation}
\label{eq:bias}
\forall \gamma \in (0, \infty),
\quad
\forall T > 0,
\qquad
\abs{\expect \bigl[u(T)\bigr] - D_{\vect e}^{\gamma}}
\leq
% \frac{L}{\beta T}\int_{0}^{\infty} \exp\bigl(- \ell \theta \min\{\gamma, \gamma^{-1}\}\bigr) \theta \, \d \theta =
\frac{\widehat C \max \{\gamma^{-2}, 1\}}{\beta T}.
\end{equation}
\end{corollary}
\begin{proof}
Applying \cref{proposition:semigroup_meanzero_observable} with $f(\vect q, \vect p) = h(\vect q, \vect p) = \vect e^\t \vect p$,
and recalling that the bias coincides with the second term on the right-hand side of~\eqref{eq:bias_without_control},
we obtain
\begin{align}
\label{eq:refined_bound_for_u}
\abs{\expect \bigl[u(T)\bigr] - D^{\gamma}_{\vect e}}
&\leq \frac{A}{\beta} \int_{0}^{\infty} \left( \gamma^{-2} \e^{- a \gamma^{-1} \theta} + \e^{-a \gamma \theta} \right) \min \left\{1, \frac{\theta}{T} \right\} \, \d \theta \\
&\leq \frac{A}{\beta T} \int_{0}^{\infty} \left( \gamma^{-2} \e^{- a \gamma^{-1} \theta} + \e^{-a \gamma \theta} \right) \theta \, \d \theta
\leq \frac{A}{\beta a^2 T} \left(1 + \frac{1}{\gamma^2} \right),
\end{align}
which directly yields the result.
% Combining this inequality with \cref{lemma:easy_lemma},
% we obtain the result.
% Despite this improvement,
% the computational cost of calculating the effective diffusion coefficient from Monte Carlo simulation in the overdamped regime is prohibitive,
% because the time step must scale as $\gamma^{-1}$ in order to accurately integrate the Langevin dynamics,
% leading to a computational cost in the overdamped limit scaling as $\gamma^2$.
\end{proof}
The estimate~\eqref{eq:bias} shows that the relative bias in fact scales as~$\max\{\gamma^{-1}, \gamma\} T^{-1}$,
and so it is sufficient to take~$T \propto \gamma$ in order to control the bias in the overdamped limit.
\begin{remark}
The case where~$V(\vect q) = 0$ is particular,
in that the correlation $\ip{\e^{\theta \mathcal L} (\vect e^\t \vect p)}{\vect e^\t \vect p}$ decays as $\e^{-\gamma t}$ with a prefactor independent of $\gamma$ in this setting.
Consequently, the bias of~$u(T)$ scales as $(\gamma T)^{-1}$ in both the underdamped and the overdamped regimes,
as observed in~\cref{example:constant}.
\end{remark}
\subsubsection{Bias of the improved estimator}%
\label{ssub:bias_of_the_improved_estimator}
We now obtain a bound on the bias of the improved estimator $v(T)$.
The following result can be viewed as a generalization of~\cref{lemma:easy_lemma},
which is recovered in the particular case when $\psi_{\vect e} = 0$.
\begin{proposition}
[Bias of the estimator]
\label{lemma:bias_improved}
Assume that $\mathcal L \psi_{\vect e} \in \lp{2}{\mu}$.
% \begin{itemize}
% \item \textbf{General bound for $\gamma \in (0, \infty)$:}
With the same notation as in~\eqref{eq:decay_semigroup_general},
it holds that
\begin{align}
\label{eq:basic_bound_bias}
\forall \gamma \in (0, \infty), \qquad
\abs{\expect \bigl[ v(T) \bigr] - D^{\gamma}_{\vect e}}
&\leq \frac{L \max\{\gamma^2, \gamma^{-2}\}}{T \ell^2 } \, \norm{\vect e^\t \vect p + \mathcal L \psi_{\vect e}} \left(\beta^{-1/2} + \norm{\mathcal L \psi_{\vect e}} \right).
\end{align}
\end{proposition}
Note that the right-hand side of~\eqref{eq:basic_bound_bias} is small when $\mathcal L \psi_{\vect e} \approx \mathcal L \phi_{\vect e} = - \vect e^\t p$.
\begin{proof}
Using Itô's formula for $\psi_{\vect e}$,
we have
\[
\psi_{\vect e}(\vect q_T, \vect p_T) - \psi_{\vect e}(\vect q_0, \vect p_0)
= \int_{0}^{T} (\mathcal L \psi_{\vect e}) (\vect q_t, \vect p_t) \, \d t
+ \sqrt{2 \gamma \beta^{-1}} \int_{0}^{T} \grad_{\vect p} \psi_{\vect e} (\vect q_t, \vect p_t) \cdot \d \vect w_t,
\]
and employing the same reasoning as in~\eqref{eq:bias_without_control}, we obtain
\begin{align*}
\expect \bigl[v(T)\bigr]
&= d[\psi_{\vect e}] + \frac{1}{2T} \, \expect \biggl[ \bigl\lvert \vect e^\t (\vect q_T - \vect q_0) \bigr\rvert^2 - \biggl| \int_0^T {\mathcal L \psi_{\vect e}}(\vect q_t, \vect p_t) \, \d t \biggr|^2 \biggr] \\
&= d[\psi_{\vect e}] + \int_{0}^{T} \Bigl( \ip{\e^{\theta \mathcal L}\bigl(\vect e^\t \vect p\bigr)}{\vect e^\t \vect p} - \ip{\e^{\theta \mathcal L} \mathcal L \psi_{\vect e}}{\mathcal L \psi_{\vect e}} \Bigr) \left( 1 - \frac{\theta}{T} \right) \d \theta \\
&= D^{\gamma}_{\vect e} - \int_{0}^{\infty} \min\left\{1, \frac{\theta}{T}\right\} \Bigl( \ip{\e^{\theta \mathcal L}\bigl(\vect e^\t \vect p\bigr)}{\vect e^\t \vect p} - \ip{\e^{\theta \mathcal L} \mathcal L \psi_{\vect e}}{\mathcal L \psi_{\vect e}} \Bigr) \, \d \theta.
\end{align*}
we denote the $L^2(\mu)$ of the generator $\mathcal L$ by~
\(
\mathcal L^* = - \mathcal L_{\rm Ham} + \gamma \mathcal L_{\rm FD}
\).
We have
\begin{align*}
\left\lvert \ip{\e^{t \mathcal L}(\vect e^\t \vect p)}{\vect e^\t \vect p} - \ip{\e^{t \mathcal L} \mathcal L \psi_{\vect e}}{\mathcal L \psi_{\vect e}} \right\rvert
&= \abs{\ip{\e^{t \mathcal L} (\vect e^\t \vect p)}{\vect e^\t \vect p + \mathcal L \psi_{\vect e}}- \ip{\e^{t \mathcal L} \left(\vect e^\t \vect p + \mathcal L \psi_{\vect e}\right)}{\mathcal L\psi_{\vect e}}} \\
&\qquad \leq \norm{\e^{t \mathcal L}}[\mathcal B\left(L^2_0(\mu) \right)] \norm*{\vect e^\t \vect p + \mathcal L \psi_{\vect e}}
\left(\norm*{\vect e^\t \vect p} + \norm{\mathcal L \psi_{\vect e}} \right) \\
&\qquad \leq L \e^{- \ell \min\{\gamma, \gamma^{-1}\} t} \norm*{\vect e^\t \vect p + \mathcal L\psi_{\vect e}} \left(\beta^{-1/2} + \norm{\mathcal L \psi_{\vect e}} \right),
\end{align*}
where $L$ and $\ell$ are the same constants as in~\eqref{eq:decay_semigroup_general}.
We finally obtain~\eqref{eq:basic_bound_bias} in view of~\eqref{eq:petit_calcul}.
\end{proof}
\Cref{lemma:bias_improved} suffers from the same shortcoming as~\cref{lemma:easy_lemma}:
it is not optimal in the large $\gamma$ regime.
Employing~\cref{proposition:semigroup_meanzero_observable} in a similar manner as in the proof of~\cref{corollary:better_bias},
we prove in~\cref{sec:proof_technical_result} that,
if $\mathcal L \psi_{\vect e} \in H^{1,\vect q}(\mu)$,
then there is $C$ independent of $\psi_{\vect e}$ such that
\begin{align}
\notag
\forall \gamma \geq 1, \quad
\forall T > 0, \qquad
\abs{\expect \bigl[ v(T) \bigr] - D_{\vect e}^{\gamma}}
&\leq C T^{-1}
\norm*{\vect e^\t \vect p + \mathcal L \psi_{\vect e}}[1,\vect q] \, \bigl(\beta^{-1/2} + \norm{\mathcal L \psi_{\vect e}}[1,\vect q] \bigr) \\
\label{eq:refined_bound}
&\quad + C T^{-1} \gamma^2 \norm{\Pi_{\vect p} \mathcal L \psi_{\vect e}} \norm{\mathcal L \psi_{\vect e}},
\end{align}
This bound is not as satisfying as~\cref{corollary:better_bias},
because a $\gamma^2$ factor remains in the second term on the right-hand side,
although the prefactor $\norm{\Pi_{\vect p} \mathcal L \psi_{\vect e}}$ is expected to be small as~$\norm{\Pi_{\vect p} \mathcal L \psi_{\vect e}} = \norm{\Pi_{\vect p} (\vect e^\t \vect p + \mathcal L \psi_{\vect e})} \leq \norm{\vect e^\t \vect p + \mathcal L \psi_{\vect e}}$.
The bound~\eqref{eq:refined_bound} is therefore an improvement over~\eqref{eq:basic_bound_bias} for large $\gamma$.
However,
unless $\Pi_{\vect p} \mathcal L \psi_{\vect e} = \Pi_{\vect p} \mathcal L_{\rm ham} \psi_{\vect e} = \mathcal O(\gamma^{-2})$,
the dependence on $\gamma$ of the bias in~\eqref{eq:refined_bound} is worse in the limit $\gamma \to \infty$
than that of the simple estimator $u(T)$, see~\eqref{eq:bias}.
It is then not clear that employing a control variate is useful in this limit.
Since our focus in this work is on the underdamped limit $\gamma \to 0$,
and since the overdamped limit $\gamma \to \infty$ for one or two-dimensional systems is more easily studied numerically through deterministic methods anyway,
we do not further investigate this issue.
% \begin{remark}
% In one spatial dimension, it is possible to modify a control variate $\psi$ in such a way that $\Pi_{\vect p} \mathcal L \psi = 0$.
% Let $\alpha^{\psi}_1(q) = \int \psi(q,p) \, \sqrt{\beta} p \, \d \kappa(p)$ and define
% \[
% \widetilde \psi = \psi(q,p) - \sqrt{\beta} p \left( \alpha_1^{\psi}(q) - k \e^{\beta V(q)} \right),
% \qquad k = \frac{\int \alpha_1^{\psi}\!(q) \, \e^{\beta V(q)} \, \d \nu(q)}{\int \e^{2 \beta V(q)} \, \d \nu(q)}.
% \]
% Note that $\widetilde \psi$ is the $L^2(\mu)$ orthogonal projection of $\psi$ on the subspace of functions of the form
% \[
% f = \alpha_0(q) + \alpha_1 \, \e^{\beta V(q)} \, \sqrt{\beta} p + \alpha_2(q) \, H_2(p) + \alpha_3(q) H_3(p), \dotsc
% \]
% where $(H_i)_{i \geq 0}$ are appropriately rescaled Hermite polynomials.
% The exact solution $\phi$ is of this form since $\Pi_{\vect p} \mathcal L = \Pi_p \mathcal L_{\rm Ham} = \Pi_p \partial_p \partial_q^*$,
% and the kernel of $\partial_q^*$ is spanned by $\e^{\beta V(q)}$.
% We can bound
% \begin{align*}
% \norm*{\widetilde \psi - \phi}[3]
% &\leq \norm{\psi - \phi}[3] + \norm{\widetilde \psi - \psi}[3] \\
% & \norm{\psi - \phi}[3] \lvert \alpha_1^{\phi} - k \rvert \norm*{\e^{\beta V(q)}} \\
% &= \norm{\psi - \phi} + \lvert \alpha_1^{\phi} - k \rvert \norm*{\e^{\beta V(q)}}.
% \end{align*}
% \begin{align*}
% \Pi_p \partial_p \partial_q^* \widetilde \psi
% &= \int \partial_p \partial_q^* \psi(q, \widetilde p) \, \d \kappa(\widetilde p)
% - \left(\partial_q^*\psi_1(q) + k \left( \partial_q^* \e^{\beta V(q)} \right) \right)
% = \partial_q^*\psi_1(q) - \partial_q^*\psi_1(q) - 0 = 0,
% \end{align*}
% and it is simple to check that our choice of $k$ guarantees that $\widetilde \psi = \psi$
% if $\psi = \phi$ is the exact solution to the Poisson equation~\eqref{eq:poisson_equation}.
% Notice that
% \[
% \widetilde \psi_{\vect e} - \phi_{\vect e} = \