Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for resource id to the archiver #2100

Merged
merged 16 commits into from
Oct 1, 2021
Merged
11 changes: 11 additions & 0 deletions changelog/unreleased/archiver-ids.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
Enhancement: Add support for resource id to the archiver

Before the archiver only supported resources provided by a path.
Now also the resources ID are supported in order to specify the content
of the archive to download. The parameters accepted by the archiver
are two: an optional list of `path` (containing the paths of the
resources) and an optional list of `id` (containing the resources IDs
of the resources).

https://github.com/cs3org/reva/pull/2100
https://github.com/cs3org/reva/issues/2097
277 changes: 94 additions & 183 deletions internal/http/services/archiver/archiver.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,187 +22,135 @@ import (
"archive/tar"
"archive/zip"
"context"
"fmt"
"io"
"net/http"
"path"
"strings"
"path/filepath"
"time"

gateway "github.com/cs3org/go-cs3apis/cs3/gateway/v1beta1"
rpc "github.com/cs3org/go-cs3apis/cs3/rpc/v1beta1"
provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1"
"github.com/cs3org/reva/internal/http/services/datagateway"
"github.com/cs3org/reva/pkg/errtypes"
"github.com/cs3org/reva/pkg/rgrpc/todo/pool"
"github.com/cs3org/reva/pkg/rhttp"
"github.com/cs3org/reva/pkg/rhttp/global"
"github.com/cs3org/reva/pkg/sharedconf"
"github.com/cs3org/reva/pkg/storage/utils/downloader"
"github.com/cs3org/reva/pkg/storage/utils/walker"
"github.com/gdexlab/go-render/render"
ua "github.com/mileusna/useragent"
"github.com/mitchellh/mapstructure"
"github.com/rs/zerolog"
)

type svc struct {
config *Config
httpClient *http.Client
gtwClient gateway.GatewayAPIClient
log *zerolog.Logger
}
const (
// ErrMaxFileCount is the error returned when the max files count specified in the config has reached
ErrMaxFileCount = errtypes.InternalError("reached max files count")
// ErrMaxSize is the error returned when the max total files size specified in the config has reached
ErrMaxSize = errtypes.InternalError("reached max total files size")
// ErrEmptyList is the error returned when an empty list is passed when an archiver is created
ErrEmptyList = errtypes.BadRequest("list of files to archive empty")
)

// Config holds the config options that need to be passed down to all ocdav handlers
// Config is the config for the Archiver
type Config struct {
Prefix string `mapstructure:"prefix"`
GatewaySvc string `mapstructure:"gatewaysvc"`
Timeout int64 `mapstructure:"timeout"`
Insecure bool `mapstructure:"insecure"`
MaxNumFiles int64 `mapstructure:"max_num_files"`
MaxSize int64 `mapstructure:"max_size"`
MaxNumFiles int64 `mapstructure:"max_num_files"`
MaxSize int64 `mapstructure:"max_size"`
}

var (
errMaxFileCount = errtypes.InternalError("reached max files count")
errMaxSize = errtypes.InternalError("reached max total files size")
)

func init() {
global.Register("archiver", New)
// Archiver is the struct able to create an archive
type Archiver struct {
files []string
dir string
walker walker.Walker
downloader downloader.Downloader
config Config
}

// New creates a new archiver service
func New(conf map[string]interface{}, log *zerolog.Logger) (global.Service, error) {
c := &Config{}
err := mapstructure.Decode(conf, c)
if err != nil {
return nil, err
// NewArchiver creates a new archiver able to create an archive containing the files in the list
func NewArchiver(files []string, w walker.Walker, d downloader.Downloader, config Config) (*Archiver, error) {
if len(files) == 0 {
return nil, ErrEmptyList
}

c.init()

gtw, err := pool.GetGatewayServiceClient(c.GatewaySvc)
if err != nil {
return nil, err
dir := getDeepestCommonDir(files)
if pathIn(files, dir) {
dir = filepath.Dir(dir)
}

return &svc{
config: c,
gtwClient: gtw,
httpClient: rhttp.GetHTTPClient(
rhttp.Timeout(time.Duration(c.Timeout*int64(time.Second))),
rhttp.Insecure(c.Insecure),
),
log: log,
}, nil
arc := &Archiver{
dir: dir,
files: files,
walker: w,
downloader: d,
config: config,
}
return arc, nil
}

func (c *Config) init() {
if c.Prefix == "" {
c.Prefix = "download_archive"
// pathIn verifies that the path `f`is in the `files`list
func pathIn(files []string, f string) bool {
f = filepath.Clean(f)
for _, file := range files {
if filepath.Clean(file) == f {
return true
}
}

c.GatewaySvc = sharedconf.GetGatewaySVC(c.GatewaySvc)
return false
}

func (s *svc) Handler() http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, r *http.Request) {
// get the dir and files to archive from the URL
ctx := r.Context()
v := r.URL.Query()
if _, ok := v["dir"]; !ok {
rw.WriteHeader(http.StatusBadRequest)
return
}
dir := v["dir"][0]
func getDeepestCommonDir(files []string) string {

names, ok := v["file"]
if !ok {
names = []string{}
}
if len(files) == 0 {
return ""
}

// append to the files name the dir
files := []string{}
for _, f := range names {
p := path.Join(dir, f)
files = append(files, strings.TrimSuffix(p, "/"))
}
// find the maximum common substring from left
res := path.Clean(files[0]) + "/"

userAgent := ua.Parse(r.Header.Get("User-Agent"))
for _, file := range files[1:] {
file = path.Clean(file) + "/"

archiveName := "download"
if len(files) == 0 {
// we need to archive the whole dir
files = append(files, dir)
archiveName = path.Base(dir)
if len(file) < len(res) {
res, file = file, res
}

if userAgent.OS == ua.Windows {
archiveName += ".zip"
} else {
archiveName += ".tar"
for i := 0; i < len(res); i++ {
if res[i] != file[i] {
res = res[:i]
}
}

s.log.Debug().Msg("Requested the following files/folders to archive: " + render.Render(files))

rw.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", archiveName))
rw.Header().Set("Content-Transfer-Encoding", "binary")
}

var err error
if userAgent.OS == ua.Windows {
err = s.createZip(ctx, dir, files, rw)
} else {
err = s.createTar(ctx, dir, files, rw)
}
if err == errMaxFileCount || err == errMaxSize {
s.log.Error().Msg(err.Error())
rw.WriteHeader(http.StatusRequestEntityTooLarge)
return
}
if err != nil {
s.log.Error().Msg(err.Error())
rw.WriteHeader(http.StatusInternalServerError)
return
// the common substring could be between two / - inside a file name
for i := len(res) - 1; i >= 0; i-- {
if res[i] == '/' {
res = res[:i+1]
break
}

})
}

func (s *svc) Prefix() string {
return s.config.Prefix
}

func (s *svc) Close() error {
return nil
}

func (s *svc) Unprotected() []string {
return nil
}
return filepath.Clean(res)
}

// create a new tar containing the files in the `files` list, which are in the directory `dir`
func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io.Writer) error {
// CreateTar creates a tar and write it into the dst Writer
func (a *Archiver) CreateTar(ctx context.Context, dst io.Writer) error {
w := tar.NewWriter(dst)

var filesCount, sizeFiles int64

for _, root := range files {
for _, root := range a.files {

err := walker.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error {
err := a.walker.Walk(ctx, root, func(path string, info *provider.ResourceInfo, err error) error {
if err != nil {
return err
}

filesCount++
if filesCount > s.config.MaxNumFiles {
return errMaxFileCount
if filesCount > a.config.MaxNumFiles {
return ErrMaxFileCount
}
sizeFiles += int64(info.Size)
if sizeFiles > s.config.MaxSize {
return errMaxSize
if sizeFiles > a.config.MaxSize {
return ErrMaxSize
}

fileName := strings.TrimPrefix(path, dir)
// TODO (gdelmont): remove duplicates if the resources requested overlaps
fileName, err := filepath.Rel(a.dir, path)

if err != nil {
return err
}

header := tar.Header{
Name: fileName,
Expand All @@ -227,7 +175,7 @@ func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io.
}

if !isDir {
err = s.downloadFile(ctx, path, w)
err = a.downloader.Download(ctx, path, w)
if err != nil {
return err
}
Expand All @@ -243,29 +191,33 @@ func (s *svc) createTar(ctx context.Context, dir string, files []string, dst io.
return w.Close()
}

// create a new zip containing the files in the `files` list, which are in the directory `dir`
func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io.Writer) error {
// CreateZip creates a zip and write it into the dst Writer
func (a *Archiver) CreateZip(ctx context.Context, dst io.Writer) error {
w := zip.NewWriter(dst)

var filesCount, sizeFiles int64

for _, root := range files {
for _, root := range a.files {

err := walker.Walk(ctx, root, s.gtwClient, func(path string, info *provider.ResourceInfo, err error) error {
err := a.walker.Walk(ctx, root, func(path string, info *provider.ResourceInfo, err error) error {
if err != nil {
return err
}

filesCount++
if filesCount > s.config.MaxNumFiles {
return errMaxFileCount
if filesCount > a.config.MaxNumFiles {
return ErrMaxFileCount
}
sizeFiles += int64(info.Size)
if sizeFiles > s.config.MaxSize {
return errMaxSize
if sizeFiles > a.config.MaxSize {
return ErrMaxSize
}

fileName := strings.TrimPrefix(strings.Trim(path, dir), "/")
// TODO (gdelmont): remove duplicates if the resources requested overlaps
fileName, err := filepath.Rel(a.dir, path)
if err != nil {
return err
}

if fileName == "" {
return nil
Expand All @@ -290,7 +242,7 @@ func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io.
}

if !isDir {
err = s.downloadFile(ctx, path, dst)
err = a.downloader.Download(ctx, path, dst)
if err != nil {
return err
}
Expand All @@ -305,44 +257,3 @@ func (s *svc) createZip(ctx context.Context, dir string, files []string, dst io.
}
return w.Close()
}

func (s *svc) downloadFile(ctx context.Context, path string, dst io.Writer) error {
downResp, err := s.gtwClient.InitiateFileDownload(ctx, &provider.InitiateFileDownloadRequest{
Ref: &provider.Reference{
Path: path,
},
})

switch {
case err != nil:
return err
case downResp.Status.Code != rpc.Code_CODE_OK:
return errtypes.InternalError(downResp.Status.Message)
}

var endpoint, token string
for _, p := range downResp.Protocols {
if p.Protocol == "simple" {
endpoint, token = p.DownloadEndpoint, p.Token
}
}

httpReq, err := rhttp.NewRequest(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return err
}
httpReq.Header.Set(datagateway.TokenTransportHeader, token)

httpRes, err := s.httpClient.Do(httpReq)
if err != nil {
return err
}
defer httpRes.Body.Close()

if httpRes.StatusCode != http.StatusOK {
return errtypes.InternalError(httpRes.Status)
}

_, err = io.Copy(dst, httpRes.Body)
return err
}
Loading