Skip to content

Commit

Permalink
Allow sparse option for Kopia & Restic restore
Browse files Browse the repository at this point in the history
Signed-off-by: Ming Qiu <mqiu@vmware.com>
  • Loading branch information
qiuming-best committed Nov 28, 2023
1 parent b276564 commit b57dde1
Show file tree
Hide file tree
Showing 48 changed files with 494 additions and 178 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/7141-qiuming-best
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Allow sparse option for Kopia & Restic restore
17 changes: 8 additions & 9 deletions config/crd/v1/bases/velero.io_backups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ spec:
spec:
description: BackupSpec defines the specification for a Velero backup.
properties:
backupConfig:
description: BackupConfig defines the configuration for the backup.
properties:
parallelFilesUpload:
description: ParallelFilesUpload is the number of files parallel
uploads to perform when using the uploader.
type: integer
type: object
csiSnapshotTimeout:
description: CSISnapshotTimeout specifies the time used to wait for
CSI VolumeSnapshot status turns to ReadyToUse during creation, before
Expand Down Expand Up @@ -477,15 +485,6 @@ spec:
description: TTL is a time.Duration-parseable string describing how
long the Backup should be retained for.
type: string
uploaderConfig:
description: UploaderConfig specifies the configuration for the uploader.
nullable: true
properties:
parallelFilesUpload:
description: ParallelFilesUpload is the number of files parallel
uploads to perform when using the uploader.
type: integer
type: object
volumeSnapshotLocations:
description: VolumeSnapshotLocations is a list containing names of
VolumeSnapshotLocations associated with this backup.
Expand Down
12 changes: 5 additions & 7 deletions config/crd/v1/bases/velero.io_podvolumebackups.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,11 @@ spec:
description: Tags are a map of key-value pairs that should be applied
to the volume backup as tags.
type: object
uploaderConfig:
description: UploaderConfig specifies the configuration for the uploader.
properties:
parallelFilesUpload:
description: ParallelFilesUpload is the number of files parallel
uploads to perform when using the uploader.
type: integer
uploaderSettings:
additionalProperties:
type: string
description: UploaderSettings are a map of key-value pairs that should
be applied to the uploader configuration.
type: object
uploaderType:
description: UploaderType is the type of the uploader to handle the
Expand Down
6 changes: 6 additions & 0 deletions config/crd/v1/bases/velero.io_podvolumerestores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,12 @@ spec:
description: SourceNamespace is the original namespace for namaspace
mapping.
type: string
uploaderSettings:
additionalProperties:
type: string
description: UploaderSettings are a map of key-value pairs that should
be applied to the uploader configuration.
type: object
uploaderType:
description: UploaderType is the type of the uploader to handle the
data transfer.
Expand Down
8 changes: 8 additions & 0 deletions config/crd/v1/bases/velero.io_restores.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,14 @@ spec:
- name
type: object
x-kubernetes-map-type: atomic
restoreConfig:
description: RestoreConfig specifies the configuration for the restore.
properties:
writeSparseFiles:
description: WriteSparseFiles is a flag to indicate whether write
files sparsely or not.
type: boolean
type: object
restorePVs:
description: RestorePVs specifies whether to restore all included
PVs from snapshot
Expand Down
18 changes: 8 additions & 10 deletions config/crd/v1/bases/velero.io_schedules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,14 @@ spec:
description: Template is the definition of the Backup to be run on
the provided schedule
properties:
backupConfig:
description: BackupConfig defines the configuration for the backup.
properties:
parallelFilesUpload:
description: ParallelFilesUpload is the number of files parallel
uploads to perform when using the uploader.
type: integer
type: object
csiSnapshotTimeout:
description: CSISnapshotTimeout specifies the time used to wait
for CSI VolumeSnapshot status turns to ReadyToUse during creation,
Expand Down Expand Up @@ -514,16 +522,6 @@ spec:
description: TTL is a time.Duration-parseable string describing
how long the Backup should be retained for.
type: string
uploaderConfig:
description: UploaderConfig specifies the configuration for the
uploader.
nullable: true
properties:
parallelFilesUpload:
description: ParallelFilesUpload is the number of files parallel
uploads to perform when using the uploader.
type: integer
type: object
volumeSnapshotLocations:
description: VolumeSnapshotLocations is a list containing names
of VolumeSnapshotLocations associated with this backup.
Expand Down
10 changes: 5 additions & 5 deletions config/crd/v1/crds/crds.go

Large diffs are not rendered by default.

8 changes: 0 additions & 8 deletions config/crd/v2alpha1/bases/velero.io_datauploads.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,6 @@ spec:
description: SourcePVC is the name of the PVC which the snapshot is
taken for.
type: string
uploaderConfig:
description: UploaderConfig specifies the configuration for the uploader.
properties:
parallelFilesUpload:
description: ParallelFilesUpload is the number of files parallel
uploads to perform when using the uploader.
type: integer
type: object
required:
- backupStorageLocation
- operationTimeout
Expand Down
2 changes: 1 addition & 1 deletion config/crd/v2alpha1/crds/crds.go

Large diffs are not rendered by default.

41 changes: 40 additions & 1 deletion design/velero-uploader-configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ type UploaderConfig struct {
}
```

### Integration with Backup CRD
### Integration with Backup & Restore CRD
The Velero CLI will support an uploader configuration-related flag, allowing users to set the value when creating backups or restores. This value will be stored in the `UploaderConfig` field within the `Backup` CRD and `Restore` CRD:

```go
Expand Down Expand Up @@ -122,5 +122,44 @@ Roughly, the process is as follows:
3. Each respective controller within the CRs calls the uploader, and the ParallelFilesUpload from UploaderConfig in CRs is passed to the uploader.
4. When the uploader subsequently calls the Kopia API, it can use the ParallelFilesUpload to set the MaxParallelFileReads parameter, and if the uploader calls the Restic command it would output one warning log for Restic does not support this feature.

### Sparse Option For Kopia & Restic Restore
In many system files, there are numerous zero bytes or empty blocks that still occupy physical storage space. Sparse backup employs a more intelligent approach by only backing up the actual data-containing portions. For those empty blocks or zero bytes, it merely records their presence without actually storing them. This can significantly reduce the storage space required for backups, especially in situations where there is a substantial amount of empty data in large file systems.

Below are the key steps that should be added to support this new feature.
#### Velero CLI
The Velero CLI will support a `--write-sparse-files` flag, allowing users to set the `WriteSparseFiles` value when creating restores with Restic or Kopia uploader.

#### UploaderConfig
below the sub-option `WriteSparseFiles` is added into UploaderConfig:

```go
type UploaderConfig struct {
// +optional
WriteSparseFiles bool `json:"writeSparseFiles,omitempty"`
}
```

### Enable Sparse in Restic
For Restic, it could be enabled by pass the flag `--sparse` in creating restore:

```bash
restic restore create --sparse $snapshotID
```

### Enable Sparse in Kopia
For Kopia, it could be enabled this feature by the `WriteSparseFiles` field in the [FilesystemOutput](https://pkg.go.dev/github.com/kopia/kopia@v0.13.0/snapshot/restore#FilesystemOutput).

```golang
fsOutput := &restore.FilesystemOutput{
WriteSparseFiles: veleroCfg.WriteSparseFiles,
}
```

Roughly, the process is as follows:
1. Users pass the WriteSparseFiles parameter and its value through the Velero CLI. This parameter and its value are stored as a sub-option within UploaderConfig and then placed into the Restore CR.
2. When users perform file system restores, UploaderConfig is passed to the PodVolumeRestore CR. When users use the Data-mover for restores, it is passed to the DataDownload CR.
3. Each respective controller within the CRs calls the uploader, and the WriteSparseFiles from UploaderConfig in CRs is passed to the uploader.
4. When the uploader subsequently calls the Kopia API, it can use the WriteSparseFiles to set the WriteSparseFiles parameter, and if the uploader calls the Restic command it would append `--sparse` flag within the restore command.

## Alternatives Considered
To enhance extensibility further, the option of storing `UploaderConfig` in a Kubernetes ConfigMap can be explored, this approach would allow the addition and modification of configuration options without the need to modify the CRD.
23 changes: 0 additions & 23 deletions pkg/apis/velero/shared/uploader_config.go

This file was deleted.

14 changes: 9 additions & 5 deletions pkg/apis/velero/v1/backup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ package v1
import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/vmware-tanzu/velero/pkg/apis/velero/shared"
)

type Metadata struct {
Expand Down Expand Up @@ -178,10 +176,16 @@ type BackupSpec struct {
// +optional
DataMover string `json:"datamover,omitempty"`

// UploaderConfig specifies the configuration for the uploader.
// BackupConfig defines the configuration for the backup.
// +optional
// +nullable
UploaderConfig shared.UploaderConfig `json:"uploaderConfig,omitempty"`
BackupConfig *BackupConfig `json:"backupConfig,omitempty"`
}

// BackupConfig defines the configuration for the backup.
type BackupConfig struct {
// ParallelFilesUpload is the number of files parallel uploads to perform when using the uploader.
// +optional
ParallelFilesUpload int `json:"parallelFilesUpload,omitempty"`
}

// BackupHooks contains custom behaviors that should be executed at different phases of the backup.
Expand Down
6 changes: 4 additions & 2 deletions pkg/apis/velero/v1/pod_volume_backup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,10 @@ type PodVolumeBackupSpec struct {
// +optional
Tags map[string]string `json:"tags,omitempty"`

// UploaderConfig specifies the configuration for the uploader.
UploaderConfig shared.UploaderConfig `json:"uploaderConfig,omitempty"`
// UploaderSettings are a map of key-value pairs that should be applied to the
// uploader configuration.
// +optional
UploaderSettings map[string]string `json:"uploaderSettings,omitempty"`
}

// PodVolumeBackupPhase represents the lifecycle phase of a PodVolumeBackup.
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/velero/v1/pod_volume_restore_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ type PodVolumeRestoreSpec struct {

// SourceNamespace is the original namespace for namaspace mapping.
SourceNamespace string `json:"sourceNamespace"`

// UploaderSettings are a map of key-value pairs that should be applied to the
// uploader configuration.
// +optional
UploaderSettings map[string]string `json:"uploaderSettings,omitempty"`
}

// PodVolumeRestorePhase represents the lifecycle phase of a PodVolumeRestore.
Expand Down
11 changes: 11 additions & 0 deletions pkg/apis/velero/v1/restore_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,17 @@ type RestoreSpec struct {
// +optional
// +nullable
ResourceModifier *v1.TypedLocalObjectReference `json:"resourceModifier,omitempty"`

// RestoreConfig specifies the configuration for the restore.
// +optional
RestoreConfig *RestoreConfig `json:"restoreConfig,omitempty"`
}

// RestoreConfig defines the configuration for the restore.
type RestoreConfig struct {
// WriteSparseFiles is a flag to indicate whether write files sparsely or not.
// +optional
WriteSparseFiles bool `json:"writeSparseFiles,omitempty"`
}

// RestoreHooks contains custom behaviors that should be executed during or post restore.
Expand Down
58 changes: 55 additions & 3 deletions pkg/apis/velero/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions pkg/apis/velero/v2alpha1/data_upload_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ type DataUploadSpec struct {
// OperationTimeout specifies the time used to wait internal operations,
// before returning error as timeout.
OperationTimeout metav1.Duration `json:"operationTimeout"`

// UploaderConfig specifies the configuration for the uploader.
UploaderConfig shared.UploaderConfig `json:"uploaderConfig,omitempty"`
}

type SnapshotType string
Expand Down
Loading

0 comments on commit b57dde1

Please sign in to comment.