Skip to content

Commit

Permalink
feat: Delete old dumps except for the last X
Browse files Browse the repository at this point in the history
Introduced retention parameters for the delete command. The delete command now supports the following retention parameters:

- dry-run: Runs the deletion operation in test mode with verbose output, without actually deleting anything.
- --before-date 2024-08-27T23:50:54+00:00: Deletes dumps older than the specified date. The date must be provided in RFC3339Nano format, for example: 2021-01-01T00:00:00Z.
- --retain-recent 10: Retains the N most recent dumps, where N is specified by the user.
- --retain-for 1w2d3h4m5s6ms7us8ns: Retains dumps for the specified duration. The format supports weeks (w), days (d), hours (h), minutes (m), seconds (s), milliseconds (ms), microseconds (us), and nanoseconds (ns).
- --prune-failed: Prunes (removes) all dumps that have failed.
- --prune-unsafe: Prunes dumps with "unknown-or-failed" statuses. This option only works in conjunction with --prune-failed.

Refactored code and implemented helper functions to support the new features.

Introduced two new statuses: `failed` and `in progress`. A dump is considered `failed` if it lacks a "done" heartbeat or if the last heartbeat timestamp exceeds 30 minutes. Heartbeats are recorded every 15 minutes by the dump command while it is in progress. If `greenmask` fails unexpectedly, the dump is marked as `failed` if the heartbeat is not finalized. The `in progress` status indicates that a dump is still in progress.

The heartbeat mechanism determines dump status by storing a heartbeat file in the associated dump storage path, which is updated every 15 minutes. If the heartbeat is not updated for more than 30 minutes (twice the interval), the dump is classified as `failed`.

Revised docs.
  • Loading branch information
wwoytenko committed Sep 18, 2024
1 parent ae50218 commit f918a2d
Show file tree
Hide file tree
Showing 22 changed files with 704 additions and 176 deletions.
346 changes: 346 additions & 0 deletions cmd/greenmask/cmd/delete/delete_dump.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,346 @@
// Copyright 2023 Greenmask
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package delete

import (
"cmp"
"context"
"fmt"
"slices"
"time"

"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/spf13/cobra"
gostr "github.com/xhit/go-str2duration/v2"

pgDomains "github.com/greenmaskio/greenmask/internal/domains"
"github.com/greenmaskio/greenmask/internal/storages"
"github.com/greenmaskio/greenmask/internal/storages/builder"
"github.com/greenmaskio/greenmask/internal/utils/dumpstatus"
"github.com/greenmaskio/greenmask/internal/utils/logger"
)

var (
pruneFailed bool
pruneUnsafe bool
dryRun bool
retainRecent int
beforeDate string
retainFor string
)

var (
Cmd = &cobra.Command{
Use: "delete",
Short: "delete dump from the storage with a specific ID",
//Args: cobra.ExactArgs(1),
Run: func(cmd *cobra.Command, args []string) {
var dumpId string
if err := logger.SetLogLevel(Config.Log.Level, Config.Log.Format); err != nil {
log.Fatal().Err(err).Msg("")
}

if len(args) > 0 {
dumpId = args[0]
}

if err := run(dumpId); err != nil {
log.Fatal().Err(err).Msg("")
}
},
}
Config = pgDomains.NewConfig()
)

func run(dumpId string) error {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
st, err := builder.GetStorage(ctx, &Config.Storage, &Config.Log)
if err != nil {
log.Fatal().Err(err).Msg("")
}

if pruneUnsafe && !pruneFailed {
log.Fatal().Msg("--include-unsafe works only with --prune-failed")
}

if retainFor != "" {
if err := retainForDumps(ctx, st, retainFor); err != nil {
log.Fatal().Err(err).Msg("error --retain-for duration")
}
} else if retainRecent != -1 {
if err := retainRecentNDumps(ctx, st); err != nil {
log.Fatal().
Err(err).
Msgf("error retaining the most recent %d dumps", retainRecent)
}
} else if pruneFailed {
if err := pruneFailedDumps(ctx, st, pruneUnsafe); err != nil {
log.Fatal().Err(err).Msg("error pruning failed dumps")
}
} else if beforeDate != "" {
if err := deleteBeforeDate(ctx, st, beforeDate); err != nil {
log.Fatal().Err(err).Msg("error deleting dumps elder than date")
}
} else if dumpId != "" {
if err := deleteDump(dumpId); err != nil {
log.Fatal().Err(err).Msg("error deleting dump")
}
} else {
log.Fatal().Msg("either --prune-failed, --prune-unknown-or-failed, --delete-elder-than, --keep-recent or dumpId should be provided")
}

return nil
}

func deleteDump(dumpId string) error {

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
st, err := builder.GetStorage(ctx, &Config.Storage, &Config.Log)
if err != nil {
log.Fatal().Err(err).Msg("")
}

_, dirs, err := st.ListDir(ctx)
if err != nil {
log.Fatal().Err(err).Msg("")
}

if !slices.ContainsFunc(dirs, func(sst storages.Storager) bool {
return dumpId == sst.Dirname()
}) {
return fmt.Errorf("dump with id %s was not found", dumpId)
}

if err = st.DeleteAll(ctx, dumpId); err != nil {
return fmt.Errorf("storage error: %s", err)
}

return nil
}

func pruneFailedDumps(ctx context.Context, st storages.Storager, pruneUnsafe bool) error {
sr, err := getSortedBackupWithStatuses(ctx, st)
if err != nil {
return fmt.Errorf("could not get sorted dumps: %s", err)
}
for _, d := range sr.Failed {
if err = deleteDumpById(ctx, st, d, dryRun); err != nil {
return fmt.Errorf("could not delete dump %s: %s", d.DumpId, err)
}
}
if pruneUnsafe {
for _, d := range sr.UnknownOrFailed {
if err = deleteDumpById(ctx, st, d, dryRun); err != nil {
return fmt.Errorf("could not delete dump %s: %s", d.DumpId, err)
}
}
}
return nil
}

func deleteBeforeDate(ctx context.Context, st storages.Storager, dateStr string) error {
dt, err := time.Parse(time.RFC3339Nano, dateStr)
if err != nil {
return fmt.Errorf("could not parse --defore-date date: %s", err)
}
e := log.Info().
Bool("DryRun", dryRun).
Time("BeforeDate", dt)
if log.Logger.GetLevel() == zerolog.DebugLevel {
e.Time("BeforeDateUtc", dt.UTC())
}
e.Msg("deleting dumps older than")

sr, err := getSortedBackupWithStatuses(ctx, st)
if err != nil {
return fmt.Errorf("could not get sorted dumps: %s", err)
}
for _, d := range sr.Valid {
if d.Date.Before(dt) {
if err = deleteDumpById(ctx, st, d, dryRun); err != nil {
return fmt.Errorf("could not delete dump %s: %s", d.DumpId, err)
}
}
}
return nil
}

func retainForDumps(ctx context.Context, st storages.Storager, retainFor string) error {
dur, err := gostr.ParseDuration(retainFor)
if err != nil {
log.Fatal().Err(err).Msg("error --retain-for duration")
}
fromDate := time.Now().Add(-dur)
log.Info().
Bool("DryRun", dryRun).
Str("Duration", gostr.String(dur)).
Time("ToDate", time.Now()).
Time("FromDate", fromDate).
Msg("deleting dumps older than")

sr, err := getSortedBackupWithStatuses(ctx, st)
if err != nil {
return fmt.Errorf("could not get sorted dumps: %s", err)
}
for _, d := range sr.Valid {
if time.Since(d.Date) < dur {
continue
}
if err = deleteDumpById(ctx, st, d, dryRun); err != nil {
return fmt.Errorf("could not delete dump %s: %s", d.DumpId, err)
}
}
return nil
}

func retainRecentNDumps(ctx context.Context, st storages.Storager) error {
sr, err := getSortedBackupWithStatuses(ctx, st)
if err != nil {
return fmt.Errorf("could not get sorted dumps: %s", err)
}

log.Info().
Int("Kept", retainRecent).
Bool("DryRun", dryRun).
Msg("retaining the most recent N dumps")

for idx, d := range sr.Valid {
if idx < retainRecent {
continue
}
if err = deleteDumpById(ctx, st, d, dryRun); err != nil {
return fmt.Errorf("could not delete dump %s: %s", d.DumpId, err)
}
}
return nil
}

func getSortedBackupWithStatuses(ctx context.Context, st storages.Storager) (*StorageResponse, error) {
var valid, failed, unknownOrFailed []*Dump
_, backups, err := st.ListDir(ctx)
if err != nil {
return nil, err
}
for _, backup := range backups {
status, md, err := dumpstatus.GetDumpStatusAndMetadata(ctx, backup)
if err != nil {
log.Warn().
Str("DumpId", backup.Dirname()).
Err(err).
Msg("unable to get dump status for dump")
}
d := Dump{
DumpId: backup.Dirname(),
Status: status,
}
if status == dumpstatus.DoneStatusName {
d.Date = md.StartedAt
d.Database = md.Header.DbName
}
switch status {
case dumpstatus.DoneStatusName:
valid = append(valid, &d)
case dumpstatus.FailedStatusName:
failed = append(failed, &d)
case dumpstatus.UnknownOrFailedStatusName:
unknownOrFailed = append(unknownOrFailed, &d)
}
}

slices.SortFunc(valid, func(a, b *Dump) int {
return cmp.Compare(b.DumpId, a.DumpId)
})

slices.SortFunc(failed, func(a, b *Dump) int {
return cmp.Compare(b.DumpId, a.DumpId)
})

slices.SortFunc(unknownOrFailed, func(a, b *Dump) int {
return cmp.Compare(b.DumpId, a.DumpId)
})

return &StorageResponse{
Valid: valid,
Failed: failed,
UnknownOrFailed: unknownOrFailed,
}, nil
}

func deleteDumpById(ctx context.Context, st storages.Storager, d *Dump, dryRun bool) error {
if d.DumpId == "" {
panic("empty dump id")
}
e := log.Info().
Str("DumpId", d.DumpId)
if !d.Date.IsZero() {
e.Str("Date", d.Date.String())
}
if log.Logger.GetLevel() == zerolog.DebugLevel {
e.Str("DateUTC", d.Date.UTC().String())
}
if d.Database != "" {
e.Str("Database", d.Database)
}
msg := "deleting dump"
if dryRun {
msg = "deleting dump (dry-run)"
}
e.Msg(msg)

if dryRun {
return nil
}
if err := st.DeleteAll(ctx, d.DumpId); err != nil {
return err
}
return nil
}

func init() {
// General options:
Cmd.Flags().IntVar(&retainRecent,
"retain-recent",
-1,
"retain the most recent N completed dumps",
)
Cmd.Flags().BoolVar(&pruneFailed,
"prune-failed",
false,
"prune failed dumps",
)
Cmd.Flags().StringVar(&beforeDate,
"before-date",
"",
"delete dumps older than the specified date in RFC3339Nano format: 2021-01-01T00:00.0:00Z",
)
Cmd.Flags().StringVar(&retainFor,
"retain-for",
"",
"retain dumps for the specified duration in format: 1w2d3h4m5s6ms7us8ns",
)
Cmd.Flags().BoolVar(&pruneUnsafe,
"prune-unsafe",
false,
`prune dumps with "unknown-or-failed" statuses. Works only with --prune-failed`,
)
Cmd.Flags().BoolVar(&dryRun,
"dry-run",
false,
"do not delete anything, just show what would be deleted",
)
}
16 changes: 16 additions & 0 deletions cmd/greenmask/cmd/delete/domains.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package delete

import "time"

type StorageResponse struct {
Valid []*Dump
Failed []*Dump
UnknownOrFailed []*Dump
}

type Dump struct {
DumpId string
Date time.Time
Status string
Database string
}
Loading

0 comments on commit f918a2d

Please sign in to comment.