Skip to content

Commit

Permalink
New function git_timestamp that returns the commit timestamp (#796)
Browse files Browse the repository at this point in the history
* New function git_timestamp that returns the commit timestamp

* Reproduce #780 with a test

* The commit time is available as a Unix timestamp with %ct
  • Loading branch information
mwouts authored Jun 4, 2021
1 parent 38c1e37 commit 4558df9
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 19 deletions.
2 changes: 2 additions & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Jupytext ChangeLog

**Changed**
- Jupytext CLI has a new option `--use-source-timestamp` that sets the last modification time of the output file equal to that of the source file (this avoids having to change the timestamp of the source file) ([#784](https://github.com/mwouts/jupytext/issues/784))
- In the pre-commit mode, Jupytext now uses the commit timestamp to determine which file in the pair is the most recent ([#780](https://github.com/mwouts/jupytext/issues/780))


**Fixed**
- Dependencies of the JupyterLab extension have been upgraded to fix a security vulnerability ([#783](https://github.com/mwouts/jupytext/issues/783))
Expand Down
58 changes: 42 additions & 16 deletions jupytext/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,43 @@ class InconsistentVersions(ValueError):
"""An error raised when two paired files in the git index contain inconsistent representations"""


def file_in_git_index(path):
if not os.path.isfile(path):
return False
return system("git", "status", "--porcelain", path).strip().startswith(("M", "A"))


def git_timestamp(path):
if not os.path.isfile(path):
return None

# Files that are in the git index are considered most recent
if file_in_git_index(path):
return float("inf")

# Return the commit timestamp
try:
git_ts_str = system("git", "log", "-1", "--pretty=%ct", path).strip()
except SystemExit as err:
if err.code == 128:
# git not initialized
git_ts_str = ""
else:
raise

if git_ts_str:
return float(git_ts_str)

# The file is not in the git index
return get_timestamp(path)


def get_timestamp(path):
if not os.path.isfile(path):
return None
return os.lstat(path).st_mtime


def load_paired_notebook(notebook, fmt, config, formats, nb_file, log, pre_commit_mode):
"""Update the notebook with the inputs and outputs of the most recent paired files"""
if not formats:
Expand All @@ -968,33 +1005,20 @@ def load_paired_notebook(notebook, fmt, config, formats, nb_file, log, pre_commi
_, fmt_with_prefix_suffix = find_base_path_and_format(nb_file, formats)
fmt.update(fmt_with_prefix_suffix)

def file_in_git_index(path):
return system("git", "status", "--porcelain", path).startswith(("M", "A"))

use_git_index_rather_than_timestamp = pre_commit_mode and file_in_git_index(nb_file)

def get_timestamp(path):
if not os.path.isfile(path):
return None
if use_git_index_rather_than_timestamp:
# Files that are in the git index are considered more recent
return file_in_git_index(path)
return os.lstat(path).st_mtime

def read_one_file(path, fmt):
if path == nb_file:
return notebook

log(f"[jupytext] Loading {shlex.quote(path)}")
return read(path, fmt=fmt, config=config)

if use_git_index_rather_than_timestamp:
if pre_commit_mode and file_in_git_index(nb_file):
# We raise an error if two representations of this notebook in the git index are inconsistent
nb_files_in_git_index = sorted(
(
(alt_path, alt_fmt)
for alt_path, alt_fmt in paired_paths(nb_file, fmt, formats)
if get_timestamp(alt_path)
if file_in_git_index(alt_path)
),
key=lambda x: 0 if x[1]["extension"] != ".ipynb" else 1,
)
Expand All @@ -1016,7 +1040,9 @@ def read_one_file(path, fmt):
f" git reset {shlex.quote(path0)} && git checkout -- {shlex.quote(path0)}\n"
)

inputs, outputs = latest_inputs_and_outputs(nb_file, fmt, formats, get_timestamp)
inputs, outputs = latest_inputs_and_outputs(
nb_file, fmt, formats, git_timestamp if pre_commit_mode else get_timestamp
)
notebook = read_pair(inputs, outputs, read_one_file)

return notebook, inputs.path, outputs.path
Expand Down
102 changes: 99 additions & 3 deletions tests/test_pre_commit_mode.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from nbformat.v4.nbbase import new_markdown_cell, new_notebook
import os
import time

from jupytext import write
from jupytext.cli import is_untracked, jupytext
import pytest
from nbformat.v4.nbbase import new_code_cell, new_markdown_cell, new_notebook

from jupytext import read, write
from jupytext.cli import get_timestamp, git_timestamp, is_untracked, jupytext


def test_is_untracked(tmpdir, cwd_tmpdir, tmp_repo):
Expand Down Expand Up @@ -156,3 +160,95 @@ def test_pre_commit_local_config(tmpdir, cwd_tmpdir, tmp_repo, python_notebook,
assert not err, err
assert "updating test" not in out.lower(), out
assert exit_code == 0, out


def test_git_timestamp(tmpdir, cwd_tmpdir, tmp_repo):
# No commit yet => return the file timestamp
tmpdir.join("file_1").write("")
assert git_timestamp("file_1") == get_timestamp("file_1")

# Staged files are always considered more recent than committed files, i.e timestamp==inf
time.sleep(0.1)
tmpdir.join("file_2").write("")
tmp_repo.git.add(".")

assert get_timestamp("file_1") < get_timestamp("file_2")
assert git_timestamp("file_1") == git_timestamp("file_2") == float("inf")

tmp_repo.index.commit("Add file_1 and file_2")
assert get_timestamp("file_1") < get_timestamp("file_2")
assert git_timestamp("file_1") == git_timestamp("file_2") < float("inf")
assert (
git_timestamp("file_1")
< get_timestamp("file_1") + 1
< git_timestamp("file_1") + 2
)

# Git timestamps have a resolution of 1 sec, so if we want to see
# different git timestamps between file_1 and file_2 we need this:
time.sleep(1.2)

# Here we just touch the file (content unchanged). The git timestamp is not modified
tmpdir.join("file_1").write("")
assert get_timestamp("file_1") > get_timestamp("file_2")
assert git_timestamp("file_1") == git_timestamp("file_2") < float("inf")

# When we modify the file then its "git_timestamp" becomes inf again
tmpdir.join("file_1").write("modified")
assert get_timestamp("file_1") > get_timestamp("file_2")
assert float("inf") == git_timestamp("file_1") > git_timestamp("file_2")

tmp_repo.git.add(".")
assert float("inf") == git_timestamp("file_1") > git_timestamp("file_2")

# When the file is committed its timestamp is the commit timestamp
tmp_repo.index.commit("Update file_1")
assert float("inf") > git_timestamp("file_1") > git_timestamp("file_2")

# If a file is not in the git repo then we return its timestamp
tmpdir.join("file_3").write("")
assert git_timestamp("file_3") == get_timestamp("file_3")


@pytest.mark.parametrize(
"commit_order", [["test.py", "test.ipynb"], ["test.ipynb", "test.py"]]
)
@pytest.mark.parametrize("sync_file", ["test.py", "test.ipynb"])
def test_sync_pre_commit_mode_respects_commit_order_780(
tmpdir,
cwd_tmpdir,
tmp_repo,
python_notebook,
commit_order,
sync_file,
):
file_1, file_2 = commit_order

nb = python_notebook
nb.metadata["jupytext"] = {"formats": "ipynb,py:percent"}
nb.cells = [new_code_cell("1 + 1")]
write(nb, file_1)

tmp_repo.git.add(file_1)
tmp_repo.index.commit(file_1)

# This needs to >1 sec because commit timestamps have a one-second resolution
time.sleep(1.2)

nb.cells = [new_code_cell("2 + 2")]
write(nb, file_2)

tmp_repo.git.add(file_2)
tmp_repo.index.commit(file_2)

# Invert file timestamps
ts_1 = os.stat(file_1).st_mtime
ts_2 = os.stat(file_2).st_mtime
os.utime(file_1, (ts_2, ts_2))
os.utime(file_2, (ts_1, ts_1))

jupytext(["--sync", "--pre-commit-mode", sync_file])

for file in commit_order:
nb = read(file)
assert nb.cells[0].source == "2 + 2", file

0 comments on commit 4558df9

Please sign in to comment.