Skip to content

Commit

Permalink
Merge pull request #2564 from sipsma/fix-2557
Browse files Browse the repository at this point in the history
cache: handle crash after snapshot commit
  • Loading branch information
tonistiigi committed Jan 19, 2022
2 parents 17c237d + eb935e5 commit e47e047
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 15 deletions.
51 changes: 36 additions & 15 deletions cache/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,25 +410,32 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt

if mutableID := md.getEqualMutable(); mutableID != "" {
mutable, err := cm.getRecord(ctx, mutableID)
if err != nil {
// check loading mutable deleted record from disk
if IsNotFound(err) {
if err == nil {
rec := &cacheRecord{
mu: &sync.Mutex{},
cm: cm,
refs: make(map[ref]struct{}),
parentRefs: parents,
cacheMetadata: md,
equalMutable: &mutableRef{cacheRecord: mutable},
}
mutable.equalImmutable = &immutableRef{cacheRecord: rec}
cm.records[id] = rec
return rec, nil
} else if IsNotFound(err) {
// The equal mutable for this ref is not found, check to see if our snapshot exists
if _, statErr := cm.Snapshotter.Stat(ctx, md.getSnapshotID()); statErr != nil {
// this ref's snapshot also doesn't exist, just remove this record
cm.MetadataStore.Clear(id)
return nil, errors.Wrap(errNotFound, id)
}
// Our snapshot exists, so there may have been a crash while finalizing this ref.
// Clear the equal mutable field and continue using this ref.
md.clearEqualMutable()
md.commitMetadata()
} else {
return nil, err
}

rec := &cacheRecord{
mu: &sync.Mutex{},
cm: cm,
refs: make(map[ref]struct{}),
parentRefs: parents,
cacheMetadata: md,
equalMutable: &mutableRef{cacheRecord: mutable},
}
mutable.equalImmutable = &immutableRef{cacheRecord: rec}
cm.records[id] = rec
return rec, nil
}

rec := &cacheRecord{
Expand All @@ -448,6 +455,20 @@ func (cm *cacheManager) getRecord(ctx context.Context, id string, opts ...RefOpt
return nil, errors.Wrapf(errNotFound, "failed to get deleted record %s", id)
}

if rec.mutable {
// If the record is mutable, then the snapshot must exist
if _, err := cm.Snapshotter.Stat(ctx, rec.ID()); err != nil {
if !errdefs.IsNotFound(err) {
return nil, errors.Wrap(err, "failed to check mutable ref snapshot")
}
// the snapshot doesn't exist, clear this record
if err := rec.remove(ctx, true); err != nil {
return nil, errors.Wrap(err, "failed to remove mutable rec with missing snapshot")
}
return nil, errors.Wrap(errNotFound, rec.ID())
}
}

if err := initializeMetadata(rec.cacheMetadata, rec.parentRefs, opts...); err != nil {
return nil, err
}
Expand Down
81 changes: 81 additions & 0 deletions cache/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1684,6 +1684,87 @@ func TestMergeOp(t *testing.T) {
checkDiskUsage(ctx, t, cm, 0, 0)
}

func TestLoadHalfFinalizedRef(t *testing.T) {
// This test simulates the situation where a ref w/ an equalMutable has its
// snapshot committed but there is a crash before the metadata is updated to
// clear the equalMutable field. It's expected that the mutable will be
// removed and the immutable ref will continue to be usable.
t.Parallel()

ctx := namespaces.WithNamespace(context.Background(), "buildkit-test")

tmpdir, err := ioutil.TempDir("", "cachemanager")
require.NoError(t, err)
defer os.RemoveAll(tmpdir)

snapshotter, err := native.NewSnapshotter(filepath.Join(tmpdir, "snapshots"))
require.NoError(t, err)

co, cleanup, err := newCacheManager(ctx, cmOpt{
tmpdir: tmpdir,
snapshotter: snapshotter,
snapshotterName: "native",
})
require.NoError(t, err)
defer cleanup()
cm := co.manager.(*cacheManager)

mref, err := cm.New(ctx, nil, nil, CachePolicyRetain)
require.NoError(t, err)
mutRef := mref.(*mutableRef)

iref, err := mutRef.Commit(ctx)
require.NoError(t, err)
immutRef := iref.(*immutableRef)

require.NoError(t, mref.Release(ctx))

_, err = co.lm.Create(ctx, func(l *leases.Lease) error {
l.ID = immutRef.ID()
l.Labels = map[string]string{
"containerd.io/gc.flat": time.Now().UTC().Format(time.RFC3339Nano),
}
return nil
})
require.NoError(t, err)
err = co.lm.AddResource(ctx, leases.Lease{ID: immutRef.ID()}, leases.Resource{
ID: immutRef.getSnapshotID(),
Type: "snapshots/" + cm.Snapshotter.Name(),
})
require.NoError(t, err)

err = cm.Snapshotter.Commit(ctx, immutRef.getSnapshotID(), mutRef.getSnapshotID())
require.NoError(t, err)

_, err = cm.Snapshotter.Stat(ctx, mutRef.getSnapshotID())
require.Error(t, err)

require.NoError(t, iref.Release(ctx))

require.NoError(t, cm.Close())
require.NoError(t, cleanup())

co, cleanup, err = newCacheManager(ctx, cmOpt{
tmpdir: tmpdir,
snapshotter: snapshotter,
snapshotterName: "native",
})
require.NoError(t, err)
defer cleanup()
cm = co.manager.(*cacheManager)

_, err = cm.GetMutable(ctx, mutRef.ID())
require.ErrorIs(t, err, errNotFound)

iref, err = cm.Get(ctx, immutRef.ID())
require.NoError(t, err)
require.NoError(t, iref.Finalize(ctx))
immutRef = iref.(*immutableRef)

_, err = cm.Snapshotter.Stat(ctx, immutRef.getSnapshotID())
require.NoError(t, err)
}

func TestMountReadOnly(t *testing.T) {
t.Parallel()
if runtime.GOOS != "linux" {
Expand Down

0 comments on commit e47e047

Please sign in to comment.