Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feature] Start implementing refetch of lost media files via /api/v1/admin/media_refetch #1221

Merged
merged 6 commits into from
Dec 10, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions docs/api/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3209,6 +3209,40 @@ paths:
summary: Clean up remote media older than the specified number of days.
tags:
- admin
/api/v1/admin/media_refetch:
post:
description: |-
Currently, this only includes remote emojis.
This endpoint is useful when data loss has occurred, and you want to try to recover to a working state.
operationId: mediaRefetch
parameters:
- description: Domain to refetch media from. If empty, all domains will be refetched.
in: query
name: domain
type: string
produces:
- application/json
responses:
"202":
description: Request accepted and will be processed. Check the logs for progress / errors.
"400":
description: bad request
"401":
description: unauthorized
"403":
description: forbidden
"404":
description: not found
"406":
description: not acceptable
"500":
description: internal server error
security:
- OAuth2 Bearer:
- admin
summary: Refetch media specified in the database but missing from storage.
tags:
- admin
/api/v1/apps:
post:
consumes:
Expand Down
4 changes: 4 additions & 0 deletions internal/api/client/admin/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
// AccountsActionPath is used for taking action on a single account.
AccountsActionPath = AccountsPathWithID + "/action"
MediaCleanupPath = BasePath + "/media_cleanup"
MediaRefetchPath = BasePath + "/media_refetch"

// ExportQueryKey is for requesting a public export of some data.
ExportQueryKey = "export"
Expand All @@ -63,6 +64,8 @@ const (
MinShortcodeDomainKey = "min_shortcode_domain"
// LimitKey is for specifying maximum number of results to return.
LimitKey = "limit"
// DomainQueryKey is for specifying a domain during admin actions.
DomainQueryKey = "domain"
)

// Module implements the ClientAPIModule interface for admin-related actions (reports, emojis, etc)
Expand Down Expand Up @@ -90,6 +93,7 @@ func (m *Module) Route(r router.Router) error {
r.AttachHandler(http.MethodDelete, DomainBlocksPathWithID, m.DomainBlockDELETEHandler)
r.AttachHandler(http.MethodPost, AccountsActionPath, m.AccountActionPOSTHandler)
r.AttachHandler(http.MethodPost, MediaCleanupPath, m.MediaCleanupPOSTHandler)
r.AttachHandler(http.MethodPost, MediaRefetchPath, m.MediaRefetchPOSTHandler)
r.AttachHandler(http.MethodGet, EmojiCategoriesPath, m.EmojiCategoriesGETHandler)
return nil
}
93 changes: 93 additions & 0 deletions internal/api/client/admin/mediarefetch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*
GoToSocial
Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.

You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package admin

import (
"fmt"
"net/http"

"github.com/gin-gonic/gin"
"github.com/superseriousbusiness/gotosocial/internal/api"
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
"github.com/superseriousbusiness/gotosocial/internal/oauth"
)

// MediaRefetchPOSTHandler swagger:operation POST /api/v1/admin/media_refetch mediaRefetch
//
// Refetch media specified in the database but missing from storage.
// Currently, this only includes remote emojis.
// This endpoint is useful when data loss has occurred, and you want to try to recover to a working state.
//
// ---
// tags:
// - admin
//
// produces:
// - application/json
//
// security:
// - OAuth2 Bearer:
// - admin
//
// parameters:
// -
// name: domain
// in: query
// description: >-
// Domain to refetch media from.
// If empty, all domains will be refetched.
// type: string
//
// responses:
// '202':
// description: >-
// Request accepted and will be processed.
// Check the logs for progress / errors.
// '400':
// description: bad request
// '401':
// description: unauthorized
// '403':
// description: forbidden
// '404':
// description: not found
// '406':
// description: not acceptable
// '500':
// description: internal server error
func (m *Module) MediaRefetchPOSTHandler(c *gin.Context) {
authed, err := oauth.Authed(c, true, true, true, true)
if err != nil {
api.ErrorHandler(c, gtserror.NewErrorUnauthorized(err, err.Error()), m.processor.InstanceGet)
return
}

if !*authed.User.Admin {
err := fmt.Errorf("user %s not an admin", authed.User.ID)
api.ErrorHandler(c, gtserror.NewErrorForbidden(err, err.Error()), m.processor.InstanceGet)
return
}

if errWithCode := m.processor.AdminMediaRefetch(c.Request.Context(), authed, c.Query(DomainQueryKey)); errWithCode != nil {
api.ErrorHandler(c, errWithCode, m.processor.InstanceGet)
return
}

c.Status(http.StatusAccepted)
}
29 changes: 25 additions & 4 deletions internal/media/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,15 @@ const UnusedLocalAttachmentCacheDays = 3

// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
// It will block until workers are finished processing.
Stop() error

/*
PROCESSING FUNCTIONS
*/

// ProcessMedia begins the process of decoding and storing the given data as an attachment.
// It will return a pointer to a ProcessingMedia struct upon which further actions can be performed, such as getting
// the finished media, thumbnail, attachment, etc.
Expand Down Expand Up @@ -75,6 +84,10 @@ type Manager interface {
// RecacheMedia refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote.
RecacheMedia(ctx context.Context, data DataFunc, postData PostDataCallbackFunc, attachmentID string) (*ProcessingMedia, error)

/*
PRUNING FUNCTIONS
*/

// PruneAllRemote prunes all remote media attachments cached on this instance which are older than the given amount of days.
// 'Pruning' in this context means removing the locally stored data of the attachment (both thumbnail and full size),
// and setting 'cached' to false on the associated attachment.
Expand All @@ -98,10 +111,18 @@ type Manager interface {
// is returned to the caller.
PruneOrphaned(ctx context.Context, dry bool) (int, error)

// Stop stops the underlying worker pool of the manager. It should be called
// when closing GoToSocial in order to cleanly finish any in-progress jobs.
// It will block until workers are finished processing.
Stop() error
/*
REFETCHING FUNCTIONS
Useful when data loss has occurred.
*/

// RefetchEmojis iterates through remote emojis (for the given domain, or all if domain is empty string).
//
// For each emoji, the manager will check whether both the full size and static images are present in storage.
// If not, the manager will refetch and reprocess full size and static images for the emoji.
//
// The provided DereferenceMedia function will be used when it's necessary to refetch something this way.
RefetchEmojis(ctx context.Context, domain string, dereferenceMedia DereferenceMedia) (int, error)
}

type manager struct {
Expand Down
17 changes: 11 additions & 6 deletions internal/media/media_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,26 @@ package media_test

import (
"github.com/stretchr/testify/suite"
"github.com/superseriousbusiness/gotosocial/internal/concurrency"
"github.com/superseriousbusiness/gotosocial/internal/db"
gtsmodel "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/media"
"github.com/superseriousbusiness/gotosocial/internal/messages"
"github.com/superseriousbusiness/gotosocial/internal/storage"
"github.com/superseriousbusiness/gotosocial/internal/transport"
"github.com/superseriousbusiness/gotosocial/testrig"
)

type MediaStandardTestSuite struct {
suite.Suite

db db.DB
storage *storage.Driver
manager media.Manager
testAttachments map[string]*gtsmodel.MediaAttachment
testAccounts map[string]*gtsmodel.Account
testEmojis map[string]*gtsmodel.Emoji
db db.DB
storage *storage.Driver
manager media.Manager
transportController transport.Controller
testAttachments map[string]*gtsmodel.MediaAttachment
testAccounts map[string]*gtsmodel.Account
testEmojis map[string]*gtsmodel.Emoji
}

func (suite *MediaStandardTestSuite) SetupSuite() {
Expand All @@ -53,6 +57,7 @@ func (suite *MediaStandardTestSuite) SetupTest() {
suite.testAccounts = testrig.NewTestAccounts()
suite.testEmojis = testrig.NewTestEmojis()
suite.manager = testrig.NewTestMediaManager(suite.db, suite.storage)
suite.transportController = testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil, "../../testrig/media"), suite.db, concurrency.NewWorkerPool[messages.FromFederator](0, 0))
}

func (suite *MediaStandardTestSuite) TearDownTest() {
Expand Down
Loading