Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/list datasets #415

Merged
merged 6 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .github/integration/tests/tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,26 @@ fi

rm -r test-download

# Check listing files in a dataset
output=$(./sda-cli list -config testing/s3cmd-download.conf -dataset https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080)
expected="dummy_data.c4gh 1048605 dummy_data2.c4gh 1048605 dummy_data3.c4gh 1048605"
if [[ "${output//[$' \t\n\r']/}" == "${expected//[$' \t\n\r']/}" ]]; then
echo "Successfully listed files in dataset"
else
echo "Failed to list files in dataset"
exit 1
fi

# Check listing datasets
output=$(./sda-cli list -config testing/s3cmd-download.conf --datasets -url http://localhost:8080)
expected="https://doi.example/ty009.sfrrss/600.45asasga"
if [[ $output == *"$expected"* ]]; then
echo "Successfully listed datasets"
else
echo "Failed to list datasets"
exit 1
fi

# Download whole dataset by using the sda-download feature
./sda-cli sda-download -config testing/s3cmd-download.conf -dataset-id https://doi.example/ty009.sfrrss/600.45asasga -url http://localhost:8080 -outdir download-dataset --dataset

Expand Down
1 change: 0 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ require (
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect
github.com/shabbyrobe/gocovmerge v0.0.0-20180507124511-f6ea450bfb63 // indirect
golang.org/x/crypto v0.23.0 // indirect
golang.org/x/net v0.25.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/tools v0.6.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1 h1:k/i9J1pBpvlfR+9QsetwPyERs
golang.org/x/exp v0.0.0-20230522175609-2e198f4a06a1/go.mod h1:V1LtkGg67GoY2N1AnLN78QLrzxkLyJw7RJb1gzOOz9w=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190310074541-c10a0554eabf/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac=
golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM=
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
1 change: 1 addition & 0 deletions helpers/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ func getPositional(args []string) ([]string, []string) {
"--force-unencrypted",
"-force-unencrypted",
"--dataset",
"--datasets",
}
i := 1
var positional []string
Expand Down
64 changes: 58 additions & 6 deletions list/list.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package list

import (
"errors"
"flag"
"fmt"

"strings"

"github.com/NBISweden/sda-cli/helpers"
sdaDownload "github.com/NBISweden/sda-cli/sda_download"
"github.com/inhies/go-bytesize"
)

Expand All @@ -16,13 +16,16 @@ import (
// Usage text that will be displayed as command line help text when using the
// `help list` command
var Usage = `
USAGE: %s list [-config <s3config-file>] [prefix]
USAGE: %s list [-config <s3config-file>] [prefix] (-url <uri> --datasets) (-url <uri> --dataset <dataset-id>)

list:
Lists recursively all files under the user's folder in the Sensitive
Data Archive (SDA). If the [prefix] parameter is used, only the
files under the specified path will be returned. If no config is
specified, the tool will look for a previous session.
specified, the tool will look for a previous session. The --datasets
flag will list all datasets in the user's folder, given the url of
the htsgetserver. The --dataset flag will list all files in the
specified dataset and the dataset size.
`

// ArgHelp is the suffix text that will be displayed after the argument list in
Expand All @@ -38,6 +41,12 @@ var Args = flag.NewFlagSet("list", flag.ExitOnError)
var configPath = Args.String("config", "",
"S3 config file to use for listing.")

var URL = Args.String("url", "", "The url of the sda-download server")

var datasets = Args.Bool("datasets", false, "List all datasets in the user's folder.")

var dataset = Args.String("dataset", "", "List all files in the specified dataset.")

// List function lists the contents of an s3
func List(args []string) error {
// Call ParseArgs to take care of all the flag parsing
Expand All @@ -47,9 +56,7 @@ func List(args []string) error {
}

prefix := ""
if len(Args.Args()) > 1 {
return errors.New("failed to parse prefix, only one is allowed")
} else if len(Args.Args()) == 1 {
if len(Args.Args()) == 1 {
prefix = Args.Args()[0]
}

Expand All @@ -63,6 +70,25 @@ func List(args []string) error {
if err != nil {
return err
}
// case datasets
if *datasets {
err := Datasets(config.AccessToken)
if err != nil {
return err
}

return nil
}

// case dataset
if *dataset != "" {
err := DatasetFiles(config.AccessToken)
if err != nil {
return err
}

return nil
}

result, err := helpers.ListFiles(*config, prefix)
if err != nil {
Expand All @@ -76,3 +102,29 @@ func List(args []string) error {

return nil
}

func DatasetFiles(token string) error {
files, err := sdaDownload.GetFilesInfo(*URL, *dataset, "", token)
if err != nil {
return err
}
// Loop through the files and list them
for _, file := range files {
fmt.Printf("%s \t %d \n", file.DisplayFileName, file.DecryptedFileSize)
}

return nil
}

func Datasets(token string) error {
datasets, err := sdaDownload.GetDatasets(*URL, token)
if err != nil {
return err
}
// Loop through the datasets and list them
for _, dataset := range datasets {
fmt.Printf("%s \n", dataset)
}

return nil
}
8 changes: 0 additions & 8 deletions list/list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,6 @@ func (suite *TestSuite) TestNoConfig() {
assert.EqualError(suite.T(), err, "failed to load config file, reason: failed to read the configuration file")
}

func (suite *TestSuite) TestTooManyArgs() {

os.Args = []string{"list", "arg1", "arg2"}

err := List(os.Args)
assert.EqualError(suite.T(), err, "failed to parse prefix, only one is allowed")
}

func (suite *TestSuite) TestFunctionality() {

// Create a fake s3 backend
Expand Down
32 changes: 28 additions & 4 deletions sda_download/sda_download.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ func SdaDownload(args []string) error {

func datasetCase(token string) error {
fmt.Println("Downloading all files in the dataset")
files, err := getFilesInfo(*URL, *datasetID, "", token)
files, err := GetFilesInfo(*URL, *datasetID, "", token)
if err != nil {
return err
}
Expand Down Expand Up @@ -259,10 +259,11 @@ func downloadFile(uri, token, pubKeyBase64, filePath string) error {
// and returns the download URL for the file
func getFileIDURL(baseURL, token, pubKeyBase64, dataset, filename string) (string, error) {
// Get the files of the dataset
datasetFiles, err := getFilesInfo(baseURL, dataset, pubKeyBase64, token)
datasetFiles, err := GetFilesInfo(baseURL, dataset, pubKeyBase64, token)
if err != nil {
return "", err
}

// Get the file ID for the filename
var idx int
switch {
Expand Down Expand Up @@ -293,8 +294,31 @@ func getFileIDURL(baseURL, token, pubKeyBase64, dataset, filename string) (strin
return url, nil
}

// getFilesInfo gets the files of the dataset by using the dataset ID
func getFilesInfo(baseURL, dataset, pubKeyBase64, token string) ([]File, error) {
func GetDatasets(baseURL, token string) ([]string, error) {
// Sanitize the base_url
u, err := url.ParseRequestURI(baseURL)
if err != nil || u.Scheme == "" {
return []string{}, fmt.Errorf("invalid base URL")
}
// Make the url for listing datasets
datasetsURL := baseURL + "/metadata/datasets"
// Get the response body from the datasets API
allDatasets, err := getResponseBody(datasetsURL, token, "")
if err != nil {
return []string{}, fmt.Errorf("failed to get datasets, reason: %v", err)
}
// Parse the JSON response
var datasets []string
err = json.Unmarshal(allDatasets, &datasets)
if err != nil {
return []string{}, fmt.Errorf("failed to parse dataset list JSON, reason: %v", err)
}

return datasets, nil
}

// GetFilesInfo gets the files of the dataset by using the dataset ID
func GetFilesInfo(baseURL, dataset, pubKeyBase64, token string) ([]File, error) {
// Sanitize the base_url
u, err := url.ParseRequestURI(baseURL)
if err != nil || u.Scheme == "" {
Expand Down
18 changes: 17 additions & 1 deletion sda_download/sda_download_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ func (suite *TestSuite) TestGetFilesInfo() {
token := suite.accessToken
baseURL := "https://some/url"
datasetID := "test-dataset"
files, err := getFilesInfo(baseURL, datasetID, "", token)
files, err := GetFilesInfo(baseURL, datasetID, "", token)
require.NoError(suite.T(), err)
require.Len(suite.T(), files, 2)
assert.Equal(suite.T(), "file1id", files[0].FileID)
Expand All @@ -279,3 +279,19 @@ func (suite *TestSuite) TestGetFilesInfo() {
assert.Equal(suite.T(), "4b40bd16-9eba-4992-af39-a7f824e612e2", files[1].FileName)
assert.Equal(suite.T(), "TES01", files[1].DatasetID)
}

func (suite *TestSuite) TestGetDatasets() {
// Mock getBody function
defer func() { getResponseBody = getBody }()
getResponseBody = func(_, _, _ string) ([]byte, error) {
return []byte(`["https://doi.example/ty009.sfrrss/600.45asasga"]`), nil
}

// Test
token := suite.accessToken
baseURL := "https://some/url"
datasets, err := GetDatasets(baseURL, token)
require.NoError(suite.T(), err)
// assert.Contains(suite.T(), datasets, "https://doi.example/ty009.sfrrss/600.45asasga")
assert.Equal(suite.T(), datasets, []string{"https://doi.example/ty009.sfrrss/600.45asasga"})
}