Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: delay cksum calculation for rebuilding src replica and the new rebuilding snap #321

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion pkg/spdk/replica.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@
const (
restorePeriodicRefreshInterval = 2 * time.Second

checksumWaitPeriodAfterRebuilding = 10 * time.Second
checksumWaitPeriodAfterRebuilding = 10 * time.Second
checksumWaitPeriodForRebuildingSnapshot = 1 * time.Minute
)

type Replica struct {
Expand Down Expand Up @@ -233,25 +234,42 @@
return IsReplicaLvol(r.Name, lvolName) || IsBackingImageSnapLvolName(lvolName)
}

func (r *Replica) Sync(spdkClient *spdkclient.Client) (err error) {
r.Lock()
defer r.Unlock()
// It's better to let the server send the update signal

// This lvol and nvmf subsystem fetch should be protected by replica lock, in case of snapshot operations happened during the sync-up.
bdevLvolMap, err := GetBdevLvolMapWithFilter(spdkClient, r.replicaLvolFilter)
if err != nil {
return err
}

if r.SnapshotChecksumEnabled {
for _, bdevLvol := range bdevLvolMap {
if !bdevLvol.DriverSpecific.Lvol.Snapshot {
continue
}
if bdevLvol.DriverSpecific.Lvol.Xattrs[spdkclient.SnapshotChecksum] != "" {
continue
}
// Avoid error "Device or resource busy" by delaying checksum calculation for all src replica snapshot lvols during rebuilding
// as these lvols may be operated by shallow copy calls later.
if r.rebuildingSrcCache.dstReplicaName != "" {
continue

Check warning on line 259 in pkg/spdk/replica.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/replica.go#L258-L259

Added lines #L258 - L259 were not covered by tests
}
// Avoid error "Device or resource busy" by delaying checksum calculation for the newly created rebuilding snapshot lvol
// as this lvol will be exposed to the dst replica soon.
if bdevLvol.DriverSpecific.Lvol.Xattrs[spdkclient.UserCreated] == "false" && bdevLvol.DriverSpecific.Lvol.Xattrs[spdkclient.SnapshotTimestamp] != "" {
snapshotTime, err := time.Parse(time.RFC3339, bdevLvol.DriverSpecific.Lvol.Xattrs[spdkclient.SnapshotTimestamp])
if err != nil {
logrus.WithError(err).Warnf("Failed to parse snapshot timestamp %v for snapshot lvol %v before registering checksum, will skip it", bdevLvol.DriverSpecific.Lvol.Xattrs[spdkclient.SnapshotTimestamp], bdevLvol.Name)
continue

Check warning on line 267 in pkg/spdk/replica.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/replica.go#L263-L267

Added lines #L263 - L267 were not covered by tests
}
if !time.Now().After(snapshotTime.Add(checksumWaitPeriodForRebuildingSnapshot)) {
continue

Check warning on line 270 in pkg/spdk/replica.go

View check run for this annotation

Codecov / codecov/patch

pkg/spdk/replica.go#L269-L270

Added lines #L269 - L270 were not covered by tests
}
}
parentBdevLvol := bdevLvolMap[bdevLvol.DriverSpecific.Lvol.BaseSnapshot]
if bdevLvol.DriverSpecific.Lvol.Xattrs[spdkclient.UserCreated] == "false" || (parentBdevLvol != nil && parentBdevLvol.DriverSpecific.Lvol.Xattrs[spdkclient.UserCreated] == "false") {
// Skip the checksum calculation of system created snapshot lvols during rebuilding as they may be purged later.
Expand Down
Loading