Skip to content

Commit

Permalink
Merge pull request #1036 from Mrmaxmeier/reproducibility-stub-abspath…
Browse files Browse the repository at this point in the history
…-mtime

Introduce `-Z deterministic-mode` to address a few reproducability issues
  • Loading branch information
pkgw authored Jun 11, 2023
2 parents 6b6df36 + c2e2ef1 commit a3389b9
Show file tree
Hide file tree
Showing 11 changed files with 146 additions and 26 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ tempfile = "^3.1"

[package.metadata.vcpkg]
git = "https://github.com/microsoft/vcpkg"
rev = "ea222747b888b8d63df56240b262db38b095c68f"
rev = "63366443439398a62afc9a63b34b9a3ba63b1cae"
overlay-triplets-path = "dist/vcpkg-triplets"

# If other targets start using custom triplets like x86_64-pc-windows-msvc,
Expand Down
62 changes: 61 additions & 1 deletion crates/bridge_core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ pub struct CoreBridgeLauncher<'a> {
hooks: &'a mut dyn DriverHooks,
status: &'a mut dyn StatusBackend,
security: SecuritySettings,
filesystem_emulation_settings: FsEmulationSettings,
}

impl<'a> CoreBridgeLauncher<'a> {
Expand All @@ -242,8 +243,25 @@ impl<'a> CoreBridgeLauncher<'a> {
hooks,
status,
security,
filesystem_emulation_settings: FsEmulationSettings::default(),
}
}

/// While absolute paths are useful (for SyncTeX and external tools that
/// resolve paths to TeX sources), we can disable them for reproducibility.
pub fn with_expose_absolute_paths(&mut self, expose_absolute_paths: bool) -> &mut Self {
self.filesystem_emulation_settings.expose_absolute_paths = expose_absolute_paths;
self
}

/// Ditto for file modification timestamps. In deterministic mode, we return
/// the configured build time (i.e. `SOURCE_DATE_EPOCH`) instead of the
/// modification timestamp reported by the IO subsystem.
pub fn with_mtime_override(&mut self, mtime_override: Option<i64>) -> &mut Self {
self.filesystem_emulation_settings.mtime_override = mtime_override;
self
}

/// Invoke a function to launch a bridged FFI engine with a global mutex
/// held.
///
Expand All @@ -262,7 +280,12 @@ impl<'a> CoreBridgeLauncher<'a> {
F: FnOnce(&mut CoreBridgeState<'_>) -> Result<T>,
{
let _guard = ENGINE_LOCK.lock().unwrap();
let mut state = CoreBridgeState::new(self.security.clone(), self.hooks, self.status);
let mut state = CoreBridgeState::new(
self.security.clone(),
self.hooks,
self.status,
self.filesystem_emulation_settings.clone(),
);
let result = callback(&mut state);

if let Err(ref e) = result {
Expand All @@ -285,6 +308,9 @@ pub struct CoreBridgeState<'a> {
/// The security settings for this invocation
security: SecuritySettings,

/// The filesystem emulation settings for this invocation.
fs_emulation_settings: FsEmulationSettings,

/// The driver hooks associated with this engine invocation.
hooks: &'a mut dyn DriverHooks,

Expand Down Expand Up @@ -312,6 +338,7 @@ impl<'a> CoreBridgeState<'a> {
security: SecuritySettings,
hooks: &'a mut dyn DriverHooks,
status: &'a mut dyn StatusBackend,
fs_emulation_settings: FsEmulationSettings,
) -> CoreBridgeState<'a> {
CoreBridgeState {
security,
Expand All @@ -320,6 +347,7 @@ impl<'a> CoreBridgeState<'a> {
output_handles: Vec::new(),
input_handles: Vec::new(),
latest_input_path: None,
fs_emulation_settings,
}
}

Expand Down Expand Up @@ -592,6 +620,9 @@ impl<'a> CoreBridgeState<'a> {
}

fn input_get_mtime(&mut self, handle: *mut InputHandle) -> i64 {
if let Some(mtime) = self.fs_emulation_settings.mtime_override {
return mtime;
}
let rhandle: &mut InputHandle = unsafe { &mut *handle };

let maybe_time = match rhandle.get_unix_mtime() {
Expand Down Expand Up @@ -773,6 +804,32 @@ impl Default for SecuritySettings {
}
}

/// A type that stores configuration knobs related to filesystem emulation.
/// These options are not security-critical, but are relevant for
/// reproducible document builds. We default to an "accurate" view of the
/// underlying IO subsystem and have options that stub the respective IO
/// functions with fake / stable values.
#[derive(Clone, Debug)]
struct FsEmulationSettings {
/// While absolute paths are useful (for SyncTeX and external tools that
/// resolve paths to TeX sources), we can disable them for reproducibility.
expose_absolute_paths: bool,

/// Ditto for file modification timestamps. In deterministic mode, we return
/// the configured build time (i.e. `SOURCE_DATE_EPOCH`) instead of the
/// modification timestamp reported by the IO subsystem.
mtime_override: Option<i64>,
}

impl Default for FsEmulationSettings {
fn default() -> Self {
Self {
expose_absolute_paths: true,
mtime_override: None,
}
}
}

// The entry points.

/// Issue a warning.
Expand Down Expand Up @@ -968,6 +1025,9 @@ pub unsafe extern "C" fn ttbc_get_last_input_abspath(
buffer: *mut u8,
len: libc::size_t,
) -> libc::ssize_t {
if !es.fs_emulation_settings.expose_absolute_paths {
return 0;
}
match es.latest_input_path {
None => 0,

Expand Down
2 changes: 1 addition & 1 deletion crates/xetex_layout/layout/xetex-XeTeXFontInst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ XeTeXFontInst::initialize(const char* pathname, int index, int &status)

error = FT_New_Memory_Face(gFreeTypeLibrary, m_backingData, sz, index, &m_ftFace);

if (!FT_IS_SCALABLE(m_ftFace)) {
if (error || !FT_IS_SCALABLE(m_ftFace)) {
status = 1;
return;
}
Expand Down
7 changes: 4 additions & 3 deletions docs/src/ref/v1cli.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,17 @@ The following are the available flags.
| `-c` | `--chatter <LEVEL>` | How much chatter to print when running [default: default] [possible values: default, minimal] |
| | `--format <PATH>` | The name of the "format" file used to initialize the TeX engine [default: latex] |
| `-h` | `--help` | Prints help information |
| | `--hide <PATH>...` | Tell the engine that no file at `<PATH>` exists, if it tries to read it |
| | `--hide <PATH>...` | Tell the engine that no file at `<PATH>` exists, if it tries to read it |
| `-k` | `--keep-intermediates` | Keep the intermediate files generated during processing |
| | `--keep-logs` | Keep the log files generated during processing |
| | `--makefile-rules <PATH>` | Write Makefile-format rules expressing the dependencies of this run to <PATH> |
| | `--keep-logs` | Keep the log files generated during processing |
| | `--makefile-rules <PATH>` | Write Makefile-format rules expressing the dependencies of this run to `<PATH>` |
| `-C` | `--only-cached` | Use only resource files cached locally |
| `-o` | `--outdir <OUTDIR>` | The directory in which to place output files [default: the directory containing INPUT] |
| | `--outfmt <FORMAT>` | The kind of output to generate [default: pdf] [possible values: pdf, html, xdv, aux, format] |
| | `--pass <PASS>` | Which engines to run [default: default] [possible values: default, tex, bibtex_first] |
| `-p` | `--print` | Print the engine's chatter during processing |
| `-r` | `--reruns <COUNT>` | Rerun the TeX engine exactly this many times after the first |
| | `--synctex` | Generate SyncTeX data |
| | `--untrusted` | Input is untrusted: disable all known-insecure features |
| `-V` | `--version` | Prints version information |
| `-w` | `--web-bundle <URL>` | Use this URL find resource files instead of the default |
2 changes: 1 addition & 1 deletion docs/src/v2cli/build.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ cause it to look for that file in the online support bundle.
The `--print` option (or `-p` for short) will cause the engine to print the
regular terminal output of the TeX engine. This output is similar to, but not
identical to, the contents of the log file. By default, this output is only
printed if the engine encounteres a fatal error.
printed if the engine encounters a fatal error.

The `--open` option will open the built document using the system handler.

Expand Down
1 change: 1 addition & 0 deletions docs/src/v2cli/compile.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,4 @@ the set of unstable options is subject to change at any time.
| `-Z search-path=<path>` | Also look in `<path>` for files (unless `--untrusted` has been specified), like TEXINPUTS. Can be specified multiple times. |
| `-Z shell-escape` | Enable `\write18` (unless `--untrusted` has been specified) |
| `-Z shell-escape-cwd=<path>` | Working directory to use for \write18. Use $(pwd) for same behaviour as most other engines (e.g. for relative paths in \inputminted). Implies -Z shell-escape |
| `-Z deterministic-mode` | Force a deterministic build environment. Note that setting `SOURCE_DATE_EPOCH` is usually sufficient for reproducible builds, and this option makes some extra functionality trade-offs. Specifically, deterministic mode breaks SyncTeX's auxiliary files as they include and rely on absolute file paths |
6 changes: 3 additions & 3 deletions docs/src/v2cli/dump.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,9 @@ one of its parents.
[tectonic-toml]: ../ref/tectonic-toml.md

The “partial build” consists of one pass of the TeX engine. Future versions of
this tool might gain options allowing you specify different passes. This command
can be used to dump any file created by TeX during the build (so long as it's
created on the first pass).
this tool might gain options allowing you to specify different passes. This
command can be used to dump any file created by TeX during the build (so long
as it's created on the first pass).

#### Command-Line Options

Expand Down
16 changes: 2 additions & 14 deletions src/bin/tectonic/compile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
//! `compile` subcommand of the "V2" / "cargo-like" interface.
use std::{
env,
path::{Path, PathBuf},
str::FromStr,
time,
};
use structopt::StructOpt;
use tectonic_bridge_core::{SecuritySettings, SecurityStance};
Expand Down Expand Up @@ -113,6 +111,7 @@ impl CompileOptions {
let mut sess_builder =
ProcessingSessionBuilder::new_with_security(SecuritySettings::new(stance));
let format_path = self.format;
let deterministic_mode = unstable.deterministic_mode;
sess_builder
.unstables(unstable)
.format_name(&format_path)
Expand Down Expand Up @@ -199,18 +198,7 @@ impl CompileOptions {
} else {
sess_builder.bundle(config.default_bundle(only_cached, status)?);
}

let build_date_str = env::var("SOURCE_DATE_EPOCH").ok();
let build_date = match build_date_str {
Some(s) => {
let epoch = s.parse::<u64>().expect("invalid build date (not a number)");
time::SystemTime::UNIX_EPOCH
.checked_add(time::Duration::from_secs(epoch))
.expect("time overflow")
}
None => time::SystemTime::now(),
};
sess_builder.build_date(build_date);
sess_builder.build_date_from_env(deterministic_mode);
run_and_report(sess_builder, status).map(|_| 0)
}
}
Expand Down
17 changes: 16 additions & 1 deletion src/docmodel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ use crate::{
errors::{ErrorKind, Result},
status::StatusBackend,
test_util, tt_note,
unstable_opts::UnstableOptions,
};

/// Options for setting up [`Document`] instances with the driver
Expand All @@ -40,6 +41,9 @@ pub struct DocumentSetupOptions {

/// Security settings for engine features.
security: SecuritySettings,

/// Ensure a deterministic build environment.
deterministic_mode: bool,
}

impl DocumentSetupOptions {
Expand All @@ -48,6 +52,7 @@ impl DocumentSetupOptions {
pub fn new_with_security(security: SecuritySettings) -> Self {
DocumentSetupOptions {
only_cached: false,
deterministic_mode: false,
security,
}
}
Expand All @@ -61,6 +66,12 @@ impl DocumentSetupOptions {
self.only_cached = s;
self
}

/// Specify whether we want to ensure a deterministic build environment.
pub fn deterministic_mode(&mut self, s: bool) -> &mut Self {
self.deterministic_mode = s;
self
}
}

pub trait DocumentExt {
Expand Down Expand Up @@ -157,7 +168,11 @@ impl DocumentExt for Document {
sess_builder
.output_format(output_format)
.format_name(&profile.tex_format)
.build_date(std::time::SystemTime::now())
.build_date_from_env(setup_options.deterministic_mode)
.unstables(UnstableOptions {
deterministic_mode: setup_options.deterministic_mode,
..Default::default()
})
.pass(PassSetting::Default)
.primary_input_buffer(input_buffer.as_bytes())
.tex_input_name(output_profile);
Expand Down
36 changes: 35 additions & 1 deletion src/driver.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use std::{
rc::Rc,
result::Result as StdResult,
str::FromStr,
time::SystemTime,
time::{Duration, SystemTime},
};
use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SecuritySettings, SystemRequestError};
use tectonic_bundles::Bundle;
Expand Down Expand Up @@ -985,6 +985,28 @@ impl ProcessingSessionBuilder {
self
}

/// Configures the date and time of the processing session from the environment:
/// If `SOURCE_DATE_EPOCH` is set, it's used as the build date.
/// If `force_deterministic` is set, we fall back to UNIX_EPOCH.
/// Otherwise, we use the current system time.
pub fn build_date_from_env(&mut self, force_deterministic: bool) -> &mut Self {
let build_date_str = std::env::var("SOURCE_DATE_EPOCH").ok();
let build_date = match (force_deterministic, build_date_str) {
(_, Some(s)) => {
let epoch = s
.parse::<u64>()
.expect("invalid SOURCE_DATE_EPOCH (not a number)");

SystemTime::UNIX_EPOCH
.checked_add(Duration::from_secs(epoch))
.expect("time overflow")
}
(true, None) => SystemTime::UNIX_EPOCH,
(false, None) => SystemTime::now(),
};
self.build_date(build_date)
}

/// Loads unstable options into the processing session
pub fn unstables(&mut self, opts: UnstableOptions) -> &mut Self {
self.unstables = opts;
Expand Down Expand Up @@ -1830,6 +1852,18 @@ impl ProcessingSession {
let mut launcher =
CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());

// In deterministic mode, we stub a few aspects of the environment.
// They default to a "realistic" view, but we override them with static values:
if self.unstables.deterministic_mode {
launcher.with_expose_absolute_paths(false);
launcher.with_mtime_override(Some(
self.build_date
.duration_since(SystemTime::UNIX_EPOCH)
.map(|x| x.as_secs() as i64)
.expect("invalid build date in deterministic mode"),
));
}

TexEngine::default()
.halt_on_error_mode(!self.unstables.continue_on_errors)
.initex_mode(self.output_format == OutputFormat::Format)
Expand Down
21 changes: 21 additions & 0 deletions src/unstable_opts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ const HELPMSG: &str = r#"Available unstable options:
-Z shell-escape-cwd Working directory to use for \write18. Use $(pwd) for same behaviour as
most other engines (e.g. for relative paths in \inputminted).
Implies -Z shell-escape
-Z deterministic-mode Force a deterministic build environment. Note that setting
`SOURCE_DATE_EPOCH` is usually sufficient for reproducible builds,
and this option makes some extra functionality trade-offs.
Specifically, deterministic mode breaks SyncTeX's auxiliary files
as they include and rely on absolute file paths
"#;

// Each entry of this should correspond to a field of UnstableOptions.
Expand All @@ -41,6 +46,7 @@ pub enum UnstableArg {
SearchPath(PathBuf),
ShellEscapeEnabled,
ShellEscapeCwd(String),
DeterministicModeEnabled,
}

impl FromStr for UnstableArg {
Expand Down Expand Up @@ -97,6 +103,8 @@ impl FromStr for UnstableArg {
require_value("path").map(|s| UnstableArg::ShellEscapeCwd(s.to_string()))
}

"deterministic-mode" => require_no_value(value, UnstableArg::DeterministicModeEnabled),

_ => Err(format!("Unknown unstable option '{arg}'").into()),
}
}
Expand All @@ -110,6 +118,18 @@ pub struct UnstableOptions {
pub min_crossrefs: Option<i32>,
pub extra_search_paths: Vec<PathBuf>,
pub shell_escape_cwd: Option<String>,

/// Ensure a deterministic build environment.
///
/// The most significant user-facing difference is a static document build
/// date, but this is already covered by [`crate::driver::ProcessingSessionBuilder::build_date_from_env`],
/// which accepts a `deterministic` flag. Additionally, deterministic mode
/// spoofs file modification times and hides absolute paths from the engine.
///
/// There's a few ways to break determinism (shell escape, reading from
/// `/dev/urandom`), but anything else (especially behaviour in TeXLive
/// packages) is considered a bug.
pub deterministic_mode: bool,
}

impl UnstableOptions {
Expand All @@ -132,6 +152,7 @@ impl UnstableOptions {
opts.shell_escape_cwd = Some(p);
opts.shell_escape = true;
}
DeterministicModeEnabled => opts.deterministic_mode = true,
}
}

Expand Down

0 comments on commit a3389b9

Please sign in to comment.