From 02761eb97d0ea2a0e32f5e028fc68ea0e91c531a Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 26 Feb 2024 16:39:48 +0100 Subject: [PATCH 1/8] Add test - not passing --- datasetIngestor/checkMetadata_test.go | 102 ++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 datasetIngestor/checkMetadata_test.go diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go new file mode 100644 index 0000000..22c0a35 --- /dev/null +++ b/datasetIngestor/checkMetadata_test.go @@ -0,0 +1,102 @@ +package datasetIngestor + +import ( + "encoding/json" + "io/ioutil" + "io" + "net/http" + "strings" + "testing" + "os" +) + +func TestGetHost(t *testing.T) { + host := getHost() + + if len(host) == 0 { + t.Errorf("getHost() returned an empty string") + } + + if host == "unknown" { + t.Errorf("getHost() was unable to get the hostname") + } +} + +func TestCheckMetadata(t *testing.T) { + // Mock HTTP client + mockClient := &http.Client{ + Transport: RoundTripFunc(func(req *http.Request) *http.Response { + // Prepare a mock response for the HTTP client + return &http.Response{ + StatusCode: 200, + Body: io.NopCloser(strings.NewReader(`{"valid":true}`)), + Header: make(http.Header), + } + }), + } + + // Mock user map + mockUser := map[string]string{ + "displayName": "testuser", + "mail": "testuser@example.com", + } + + // Mock access groups + mockAccessGroups := []string{"group1", "group2"} + + // Mock metadata file content + mockMetadata := map[string]interface{}{ + "type": "raw", + // Add other required fields as needed for testing + } + + // Convert metadata to JSON + mockMetadataJSON, err := json.Marshal(mockMetadata) + if err != nil { + t.Fatalf("Error marshaling mock metadata: %v", err) + } + + // Create a temporary file for mock metadata + tmpfile, err := ioutil.TempFile("", "mockmetadata.json") + if err != nil { + t.Fatalf("Error creating temporary file: %v", err) + } + defer tmpfile.Close() + defer func() { + // Clean up temporary file + if err := tmpfile.Close(); err != nil { + t.Fatalf("Error closing temporary file: %v", err) + } + if err := os.Remove(tmpfile.Name()); err != nil { + t.Fatalf("Error removing temporary file: %v", err) + } + }() + + // Write mock metadata JSON to the temporary file + if _, err := tmpfile.Write(mockMetadataJSON); err != nil { + t.Fatalf("Error writing mock metadata to temporary file: %v", err) + } + + // Call the function with mock parameters + metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(mockClient, "http://example.com/api", tmpfile.Name(), mockUser, mockAccessGroups) + + // Add assertions here based on the expected behavior of the function + // For example: + if len(metaDataMap) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder == "" { + t.Error("Expected non-empty source folder") + } + if !beamlineAccount { + t.Error("Expected beamline account to be true") + } +} + +// RoundTripFunc type is a custom implementation of http.RoundTripper +type RoundTripFunc func(req *http.Request) *http.Response + +// RoundTrip executes a single HTTP transaction, returning a Response for the provided Request. +func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req), nil +} From 629d0724207b9e61aee7f7e51aad799d1a51fcea Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 13:21:18 +0100 Subject: [PATCH 2/8] Add comments and testdata dir --- datasetIngestor/checkMetadata.go | 32 ++++++++++++++------ datasetIngestor/checkMetadata_test.go | 4 +++ datasetIngestor/testdata/metadata-short.json | 8 +++++ datasetIngestor/testdata/metadata.json | 20 ++++++++++++ 4 files changed, 54 insertions(+), 10 deletions(-) create mode 100644 datasetIngestor/testdata/metadata-short.json create mode 100644 datasetIngestor/testdata/metadata.json diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index d9fc553..ff2da94 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -18,7 +18,10 @@ import ( const DUMMY_TIME = "2300-01-01T11:11:11.000Z" const DUMMY_OWNER = "x12345" +// getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host. +// If it encounters any error during the process, it gracefully falls back to returning the simple hostname or "unknown". func getHost() string { + // Try to get the hostname of the current machine. hostname, err := os.Hostname() if err != nil { return "unknown" @@ -46,27 +49,34 @@ func getHost() string { return hostname } -func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, - accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool) { +// CheckMetadata is a function that validates and augments metadata for a dataset. +// It takes an HTTP client, an API server URL, a metadata file path, a user map, and a list of access groups as input. +// It returns a map of metadata, a source folder string, and a boolean indicating whether the dataset belongs to a beamline account. +func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool) { - // read full meta data + // Read the full metadata from the file. b, err := ioutil.ReadFile(metadatafile) // just pass the file name if err != nil { log.Fatal(err) } - var metadataObj interface{} + // Unmarshal the JSON metadata into an interface{} object. + var metadataObj interface{} // Using interface{} allows metadataObj to hold any type of data, since it has no defined methods. err = json.Unmarshal(b, &metadataObj) if err != nil { log.Fatal(err) } - // use type assertion to access f's underlying map - metaDataMap = metadataObj.(map[string]interface{}) + + // Use type assertion to convert the interface{} object to a map[string]interface{}. + metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. beamlineAccount = false + // If the user is not the ingestor, check whether any of the accessGroups equal the ownerGroup. Otherwise, check for beamline-specific accounts. if user["displayName"] != "ingestor" { - if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { + // Check if the metadata contains the "ownerGroup" key. + if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom validOwner := false + // Iterate over accessGroups to validate the owner group. for _, b := range accessGroups { if b == ownerGroup { validOwner = true @@ -76,13 +86,15 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u if validOwner { log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) } else { - // check for beamline specific account if raw data + // If the owner group is not valid, check for beamline-specific accounts. if creationLocation, ok := metaDataMap["creationLocation"]; ok { + // Split the creationLocation string to extract beamline-specific information. parts := strings.Split(creationLocation.(string), "/") expectedAccount := "" if len(parts) == 4 { expectedAccount = strings.ToLower(parts[2]) + strings.ToLower(parts[3]) } + // If the user matches the expected beamline account, grant ingest access. if user["displayName"] == expectedAccount { log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount) beamlineAccount = true @@ -91,7 +103,7 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } } else { // for other data just check user name - // this is a quick and dirty test. Should be replaced by test for "globalaccess" role + // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], u := user["displayName"] if strings.HasPrefix(u, "sls") || @@ -131,7 +143,7 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u log.Printf("sourceFolderHost field added: %s", metaDataMap["sourceFolderHost"]) } } - // far raw data add PI if missing + // for raw data add PI if missing if val, ok := metaDataMap["type"]; ok { dstype := val.(string) if dstype == "raw" { diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 22c0a35..d48b92d 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -10,13 +10,17 @@ import ( "os" ) +// TestGetHost is a test function for the getHost function. func TestGetHost(t *testing.T) { + // Call the function under test. host := getHost() + // fail the test and report an error if the returned hostname is an empty string. if len(host) == 0 { t.Errorf("getHost() returned an empty string") } + // fail the test and report an error if the returned hostname is "unknown". if host == "unknown" { t.Errorf("getHost() was unable to get the hostname") } diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json new file mode 100644 index 0000000..138963a --- /dev/null +++ b/datasetIngestor/testdata/metadata-short.json @@ -0,0 +1,8 @@ +{ + "principalInvestigator":"egon.meier@psi.ch", + "creationLocation":"/PSI/SLS/CSAXS", + "sourceFolder": "/tmp/gnome", + "owner": "Andreas Menzel", + "type": "raw", + "ownerGroup": "p17880" +} diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json new file mode 100644 index 0000000..3f69f39 --- /dev/null +++ b/datasetIngestor/testdata/metadata.json @@ -0,0 +1,20 @@ +{ + "creationLocation": "/PSI/SLS/CSAXS", + "datasetName": "CMakeCache", + "description": "", + "owner": "Ana Diaz", + "ownerEmail": "ana.diaz@psi.ch", + "ownerGroup": "p17301", + "principalInvestigator": "ana.diaz@psi.ch", + "scientificMetadata": [ + { + "sample": { + "description": "", + "name": "", + "principalInvestigator": "" + } + } + ], + "sourceFolder": "/usr/share/gnome", + "type": "raw" +} From 38b5d63f91ba751a04610a5aa5e2a7ca3707f811 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 15:40:05 +0100 Subject: [PATCH 3/8] Make the tests pass. --- datasetIngestor/checkMetadata_test.go | 90 ++++++++------------------ datasetIngestor/export_test.go | 4 ++ datasetIngestor/testdata/metadata.json | 2 +- go.mod | 1 + go.sum | 3 +- 5 files changed, 34 insertions(+), 66 deletions(-) create mode 100644 datasetIngestor/export_test.go diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index d48b92d..0e8ee9f 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -1,19 +1,15 @@ package datasetIngestor import ( - "encoding/json" - "io/ioutil" - "io" "net/http" - "strings" "testing" - "os" + "time" + "reflect" ) -// TestGetHost is a test function for the getHost function. func TestGetHost(t *testing.T) { // Call the function under test. - host := getHost() + host := GetHost() // fail the test and report an error if the returned hostname is an empty string. if len(host) == 0 { @@ -27,80 +23,46 @@ func TestGetHost(t *testing.T) { } func TestCheckMetadata(t *testing.T) { + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var APIServer = TEST_API_SERVER + var metadatafile1 = "testdata/metadata.json" + // var metadatafile2 = "testdata/metadata-short.json" + // Mock HTTP client - mockClient := &http.Client{ - Transport: RoundTripFunc(func(req *http.Request) *http.Response { - // Prepare a mock response for the HTTP client - return &http.Response{ - StatusCode: 200, - Body: io.NopCloser(strings.NewReader(`{"valid":true}`)), - Header: make(http.Header), + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, } - }), - } // Mock user map - mockUser := map[string]string{ - "displayName": "testuser", + user := map[string]string{ + "displayName": "csaxsswissfel", "mail": "testuser@example.com", } // Mock access groups - mockAccessGroups := []string{"group1", "group2"} - - // Mock metadata file content - mockMetadata := map[string]interface{}{ - "type": "raw", - // Add other required fields as needed for testing - } - - // Convert metadata to JSON - mockMetadataJSON, err := json.Marshal(mockMetadata) - if err != nil { - t.Fatalf("Error marshaling mock metadata: %v", err) - } - - // Create a temporary file for mock metadata - tmpfile, err := ioutil.TempFile("", "mockmetadata.json") - if err != nil { - t.Fatalf("Error creating temporary file: %v", err) - } - defer tmpfile.Close() - defer func() { - // Clean up temporary file - if err := tmpfile.Close(); err != nil { - t.Fatalf("Error closing temporary file: %v", err) - } - if err := os.Remove(tmpfile.Name()); err != nil { - t.Fatalf("Error removing temporary file: %v", err) - } - }() - - // Write mock metadata JSON to the temporary file - if _, err := tmpfile.Write(mockMetadataJSON); err != nil { - t.Fatalf("Error writing mock metadata to temporary file: %v", err) - } + accessGroups := []string{"group1", "p17301"} // Call the function with mock parameters - metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(mockClient, "http://example.com/api", tmpfile.Name(), mockUser, mockAccessGroups) + metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) // Add assertions here based on the expected behavior of the function - // For example: if len(metaDataMap) == 0 { t.Error("Expected non-empty metadata map") } if sourceFolder == "" { t.Error("Expected non-empty source folder") } - if !beamlineAccount { - t.Error("Expected beamline account to be true") + if reflect.TypeOf(beamlineAccount).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") } } - -// RoundTripFunc type is a custom implementation of http.RoundTripper -type RoundTripFunc func(req *http.Request) *http.Response - -// RoundTrip executes a single HTTP transaction, returning a Response for the provided Request. -func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { - return f(req), nil -} diff --git a/datasetIngestor/export_test.go b/datasetIngestor/export_test.go new file mode 100644 index 0000000..760dadd --- /dev/null +++ b/datasetIngestor/export_test.go @@ -0,0 +1,4 @@ +// This file exports internal functions for testing purposes. Since the name of the file ends with "_test.go", it will not be included in the final build of the application. +package datasetIngestor + +var GetHost = getHost \ No newline at end of file diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json index 3f69f39..100db21 100644 --- a/datasetIngestor/testdata/metadata.json +++ b/datasetIngestor/testdata/metadata.json @@ -1,5 +1,5 @@ { - "creationLocation": "/PSI/SLS/CSAXS", + "creationLocation": "/PSI/SLS/CSAXS/SWISSFEL", "datasetName": "CMakeCache", "description": "", "owner": "Ana Diaz", diff --git a/go.mod b/go.mod index 61040a7..3eeddb8 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( require ( github.com/creack/pty v1.1.17 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect github.com/mattn/go-colorable v0.1.9 // indirect github.com/mattn/go-isatty v0.0.14 // indirect golang.org/x/sys v0.2.0 // indirect diff --git a/go.sum b/go.sum index 00500b7..4887fa9 100644 --- a/go.sum +++ b/go.sum @@ -2,8 +2,9 @@ github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2 h1:+vx7roKuyA63n github.com/Netflix/go-expect v0.0.0-20220104043353-73e0943537d2/go.mod h1:HBCaDeC1lPdgDeDbhX8XFpy1jqjK0IBG8W5K+xYqA0w= github.com/creack/pty v1.1.17 h1:QeVUsEDNrLBW4tMgZHvxy18sKtr6VI492kBhUfhDJNI= github.com/creack/pty v1.1.17/go.mod h1:MOBLtS5ELjhRRrroQr9kyvTxUAFNvYEK993ew/Vr4O4= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.13.0 h1:8LOYc1KYPPmyKMuN8QV2DNRWNbLo6LZ0iLs8+mlH53w= github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 h1:Z9n2FFNUXsshfwJMBgNA0RU6/i7WVaAegv3PtuIHPMs= From 3c7a584def4f406a306a2981611d18c8f081d127 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 16:06:01 +0100 Subject: [PATCH 4/8] Add more tests --- datasetIngestor/checkMetadata_test.go | 54 +++++++++++++++++++- datasetIngestor/export_test.go | 2 +- datasetIngestor/testdata/metadata-short.json | 2 +- 3 files changed, 54 insertions(+), 4 deletions(-) diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 0e8ee9f..955b5cd 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -27,7 +27,7 @@ func TestCheckMetadata(t *testing.T) { var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" var APIServer = TEST_API_SERVER var metadatafile1 = "testdata/metadata.json" - // var metadatafile2 = "testdata/metadata-short.json" + var metadatafile2 = "testdata/metadata-short.json" // Mock HTTP client client := &http.Client{ @@ -50,7 +50,7 @@ func TestCheckMetadata(t *testing.T) { } // Mock access groups - accessGroups := []string{"group1", "p17301"} + accessGroups := []string{"p17880", "p17301"} // Call the function with mock parameters metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) @@ -62,7 +62,57 @@ func TestCheckMetadata(t *testing.T) { if sourceFolder == "" { t.Error("Expected non-empty source folder") } + if sourceFolder != "/usr/share/gnome" { + t.Error("sourceFolder should be '/usr/share/gnome'") + } if reflect.TypeOf(beamlineAccount).Kind() != reflect.Bool { t.Error("Expected beamlineAccount to be boolean") } + if beamlineAccount != false { + t.Error("Expected beamlineAccount to be false") + } + if _, ok := metaDataMap["ownerEmail"]; !ok { + t.Error("metaDataMap missing required key 'ownerEmail'") + } + if _, ok := metaDataMap["principalInvestigator"]; !ok { + t.Error("metaDataMap missing required key 'principalInvestigator'") + } + if _, ok := metaDataMap["scientificMetadata"]; !ok { + t.Error("metaDataMap missing required key 'scientificMetadata'") + } + scientificMetadata, ok := metaDataMap["scientificMetadata"].([]interface{}) + if ok { + firstEntry := scientificMetadata[0].(map[string]interface{}) + sample, ok := firstEntry["sample"].(map[string]interface{}) + if ok { + if _, ok := sample["name"]; !ok { + t.Error("Sample is missing 'name' field") + } + if _, ok := sample["description"]; !ok { + t.Error("Sample is missing 'description' field") + } + } + } else { + t.Error("scientificMetadata is not a list") + } + + // test with the second metadata file + metaDataMap2, sourceFolder2, beamlineAccount2 := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + + // Add assertions here based on the expected behavior of the function + if len(metaDataMap2) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder2 == "" { + t.Error("Expected non-empty source folder") + } + if sourceFolder2 != "/tmp/gnome" { + t.Error("sourceFolder should be '/tmp/gnome'") + } + if reflect.TypeOf(beamlineAccount2).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") + } + if beamlineAccount2 != false { + t.Error("Expected beamlineAccount to be false") + } } diff --git a/datasetIngestor/export_test.go b/datasetIngestor/export_test.go index 760dadd..0b97c80 100644 --- a/datasetIngestor/export_test.go +++ b/datasetIngestor/export_test.go @@ -1,4 +1,4 @@ // This file exports internal functions for testing purposes. Since the name of the file ends with "_test.go", it will not be included in the final build of the application. package datasetIngestor -var GetHost = getHost \ No newline at end of file +var GetHost = getHost diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json index 138963a..06b802c 100644 --- a/datasetIngestor/testdata/metadata-short.json +++ b/datasetIngestor/testdata/metadata-short.json @@ -1,6 +1,6 @@ { "principalInvestigator":"egon.meier@psi.ch", - "creationLocation":"/PSI/SLS/CSAXS", + "creationLocation":"/PSI/SLS/CSAXS/SWISSFEL", "sourceFolder": "/tmp/gnome", "owner": "Andreas Menzel", "type": "raw", From 0793a80738dba73e1ad0afcc430afdfb19530d41 Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Fri, 22 Mar 2024 22:34:26 +0100 Subject: [PATCH 5/8] Check illegal characters --- datasetIngestor/checkMetadata.go | 45 ++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index d9fc553..003a72d 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -64,6 +64,11 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u metaDataMap = metadataObj.(map[string]interface{}) beamlineAccount = false + // Check scientificMetadata for illegal keys + if checkIllegalKeys(metaDataMap) { + log.Fatal("Metadata contains keys with illegal characters (., [], or <>).") + } + if user["displayName"] != "ingestor" { if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { validOwner := false @@ -259,3 +264,43 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u return metaDataMap, sourceFolder, beamlineAccount } + +// checkIllegalKeys checks whether the metadata contains illegal keys +func checkIllegalKeys(metadata map[string]interface{}) bool { + for _, item := range metadata { + // Use a type switch to check if the item is a slice of interface{} type, e.g. the scientificMetadata key. + switch v := item.(type) { + case []interface{}: + // If the item is a slice of interface{}, iterate over each subItem in the slice. + for _, subItem := range v { + if subMap, ok := subItem.(map[string]interface{}); ok { + if containsIllegalKeys(subMap) { + return true + } + } + } + } + } + return false +} + +// containsIllegalKeys checks whether the key contains illegal characters +func containsIllegalKeys(m map[string]interface{}) bool { + for k := range m { + if containsIllegalCharacters(k) { + return true + } + } + return false +} + +func containsIllegalCharacters(s string) bool { + // Check if the string contains periods, brackets, or other illegal characters + // You can adjust this condition based on your specific requirements + for _, char := range s { + if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' { + return true + } + } + return false +} \ No newline at end of file From d71a203f14f9ed7538e06e19b735c7f22964231d Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 25 Mar 2024 12:28:14 +0100 Subject: [PATCH 6/8] Fix the tests --- datasetIngestor/checkMetadata.go | 54 +++--- datasetIngestor/checkMetadata_test.go | 159 ++++++++++++++++++ datasetIngestor/testdata/metadata-short.json | 8 + datasetIngestor/testdata/metadata.json | 20 +++ .../testdata/metadata_illegal.json | 20 +++ 5 files changed, 235 insertions(+), 26 deletions(-) create mode 100644 datasetIngestor/checkMetadata_test.go create mode 100644 datasetIngestor/testdata/metadata-short.json create mode 100644 datasetIngestor/testdata/metadata.json create mode 100644 datasetIngestor/testdata/metadata_illegal.json diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index 003a72d..a06742f 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -2,8 +2,8 @@ package datasetIngestor import ( "bytes" - "github.com/paulscherrerinstitute/scicat/datasetUtils" "encoding/json" + "github.com/paulscherrerinstitute/scicat/datasetUtils" "io/ioutil" "log" "net" @@ -60,17 +60,19 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u if err != nil { log.Fatal(err) } - // use type assertion to access f's underlying map - metaDataMap = metadataObj.(map[string]interface{}) + + // Use type assertion to convert the interface{} object to a map[string]interface{}. + metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. beamlineAccount = false - // Check scientificMetadata for illegal keys + // Check scientificMetadata for illegal keys if checkIllegalKeys(metaDataMap) { - log.Fatal("Metadata contains keys with illegal characters (., [], or <>).") + panic("Metadata contains keys with illegal characters (., [], $, or <>).") } if user["displayName"] != "ingestor" { - if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { + // Check if the metadata contains the "ownerGroup" key. + if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom validOwner := false for _, b := range accessGroups { if b == ownerGroup { @@ -219,6 +221,9 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u //fmt.Printf("Marshalled meta data : %s\n", string(bmm)) // now check validity req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(bmm)) + if err != nil { + log.Fatal(err) + } req.Header.Set("Content-Type", "application/json") resp, err := client.Do(req) @@ -265,18 +270,25 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } -// checkIllegalKeys checks whether the metadata contains illegal keys func checkIllegalKeys(metadata map[string]interface{}) bool { - for _, item := range metadata { - // Use a type switch to check if the item is a slice of interface{} type, e.g. the scientificMetadata key. - switch v := item.(type) { + for key, value := range metadata { + if containsIllegalCharacters(key) { + return true + } + + switch v := value.(type) { + case map[string]interface{}: + if checkIllegalKeys(v) { + return true + } case []interface{}: - // If the item is a slice of interface{}, iterate over each subItem in the slice. - for _, subItem := range v { - if subMap, ok := subItem.(map[string]interface{}); ok { - if containsIllegalKeys(subMap) { + for _, item := range v { + switch itemValue := item.(type) { // Type switch on array item + case map[string]interface{}: + if checkIllegalKeys(itemValue) { return true } + // Add other cases if needed } } } @@ -284,23 +296,13 @@ func checkIllegalKeys(metadata map[string]interface{}) bool { return false } -// containsIllegalKeys checks whether the key contains illegal characters -func containsIllegalKeys(m map[string]interface{}) bool { - for k := range m { - if containsIllegalCharacters(k) { - return true - } - } - return false -} - func containsIllegalCharacters(s string) bool { // Check if the string contains periods, brackets, or other illegal characters // You can adjust this condition based on your specific requirements for _, char := range s { - if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' { + if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { return true } } return false -} \ No newline at end of file +} diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go new file mode 100644 index 0000000..5e86d8d --- /dev/null +++ b/datasetIngestor/checkMetadata_test.go @@ -0,0 +1,159 @@ +package datasetIngestor + +import ( + "net/http" + "reflect" + "testing" + "time" +) + +func TestGetHost(t *testing.T) { + // Call the function under test. + host := GetHost() + + // fail the test and report an error if the returned hostname is an empty string. + if len(host) == 0 { + t.Errorf("getHost() returned an empty string") + } + + // fail the test and report an error if the returned hostname is "unknown". + if host == "unknown" { + t.Errorf("getHost() was unable to get the hostname") + } +} + +func TestCheckMetadata(t *testing.T) { + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var APIServer = TEST_API_SERVER + var metadatafile1 = "testdata/metadata.json" + var metadatafile2 = "testdata/metadata-short.json" + // var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + + // Call the function with mock parameters + metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) + + // Add assertions here based on the expected behavior of the function + if len(metaDataMap) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder == "" { + t.Error("Expected non-empty source folder") + } + if sourceFolder != "/usr/share/gnome" { + t.Error("sourceFolder should be '/usr/share/gnome'") + } + if reflect.TypeOf(beamlineAccount).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") + } + if beamlineAccount != false { + t.Error("Expected beamlineAccount to be false") + } + if _, ok := metaDataMap["ownerEmail"]; !ok { + t.Error("metaDataMap missing required key 'ownerEmail'") + } + if _, ok := metaDataMap["principalInvestigator"]; !ok { + t.Error("metaDataMap missing required key 'principalInvestigator'") + } + if _, ok := metaDataMap["scientificMetadata"]; !ok { + t.Error("metaDataMap missing required key 'scientificMetadata'") + } + scientificMetadata, ok := metaDataMap["scientificMetadata"].([]interface{}) + if ok { + firstEntry := scientificMetadata[0].(map[string]interface{}) + sample, ok := firstEntry["sample"].(map[string]interface{}) + if ok { + if _, ok := sample["name"]; !ok { + t.Error("Sample is missing 'name' field") + } + if _, ok := sample["description"]; !ok { + t.Error("Sample is missing 'description' field") + } + } + } else { + t.Error("scientificMetadata is not a list") + } + + // test with the second metadata file + metaDataMap2, sourceFolder2, beamlineAccount2 := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + + // Add assertions here based on the expected behavior of the function + if len(metaDataMap2) == 0 { + t.Error("Expected non-empty metadata map") + } + if sourceFolder2 == "" { + t.Error("Expected non-empty source folder") + } + if sourceFolder2 != "/tmp/gnome" { + t.Error("sourceFolder should be '/tmp/gnome'") + } + if reflect.TypeOf(beamlineAccount2).Kind() != reflect.Bool { + t.Error("Expected beamlineAccount to be boolean") + } + if beamlineAccount2 != false { + t.Error("Expected beamlineAccount to be false") + } +} + +func TestCheckMetadata_CrashCase(t *testing.T) { + defer func() { + if recover() != nil { + t.Log("Function crashed as expected") + } else { + t.Fatal("Function did not crash as expected") + } + }() + + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" + var APIServer = TEST_API_SERVER + var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + // Call the function that should crash + CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) +} diff --git a/datasetIngestor/testdata/metadata-short.json b/datasetIngestor/testdata/metadata-short.json new file mode 100644 index 0000000..fbe4a5b --- /dev/null +++ b/datasetIngestor/testdata/metadata-short.json @@ -0,0 +1,8 @@ +{ + "principalInvestigator":"egon.meier@psi.ch", + "creationLocation":"/PSI/SLS/CSAXS/SWISSFEL", + "sourceFolder": "/tmp/gnome", + "owner": "first last", + "type": "raw", + "ownerGroup": "group1" +} diff --git a/datasetIngestor/testdata/metadata.json b/datasetIngestor/testdata/metadata.json new file mode 100644 index 0000000..303c22d --- /dev/null +++ b/datasetIngestor/testdata/metadata.json @@ -0,0 +1,20 @@ +{ + "creationLocation": "/PSI/SLS/CSAXS/SWISSFEL", + "datasetName": "CMakeCache", + "description": "", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": [ + { + "sample": { + "description": "", + "name": "", + "principalInvestigator": "" + } + } + ], + "sourceFolder": "/usr/share/gnome", + "type": "raw" +} diff --git a/datasetIngestor/testdata/metadata_illegal.json b/datasetIngestor/testdata/metadata_illegal.json new file mode 100644 index 0000000..accae75 --- /dev/null +++ b/datasetIngestor/testdata/metadata_illegal.json @@ -0,0 +1,20 @@ +{ + "creationLocation": "/PSI/SLS/CSAXS", + "datasetName": "CMakeCache", + "description": "", + "owner": "first last", + "ownerEmail": "test@example.com", + "ownerGroup": "group1", + "principalInvestigator": "test@example.com", + "scientificMetadata": [ + { + "sample": { + "description.": "It has an illegal characters in the key", + "name]": "same", + "principalInvestigator": "" + } + } + ], + "sourceFolder": "/usr/share/gnome", + "type": "raw" +} From e0da28d8f353bbc8cefbf8de539c25c510b3fa5d Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" Date: Mon, 25 Mar 2024 13:29:52 +0100 Subject: [PATCH 7/8] Refactor checkMetadata --- cmd/datasetIngestor/main.go | 5 +- datasetIngestor/checkMetadata.go | 418 ++++++++++++++------------ datasetIngestor/checkMetadata_test.go | 57 +--- 3 files changed, 250 insertions(+), 230 deletions(-) diff --git a/cmd/datasetIngestor/main.go b/cmd/datasetIngestor/main.go index 07f684b..ae5af47 100644 --- a/cmd/datasetIngestor/main.go +++ b/cmd/datasetIngestor/main.go @@ -203,7 +203,10 @@ func main() { /* TODO Add info about policy settings and that autoarchive will take place or not */ - metaDataMap, sourceFolder, beamlineAccount := datasetIngestor.CheckMetadata(client, APIServer, metadatafile, user, accessGroups) + metaDataMap, sourceFolder, beamlineAccount, err := datasetIngestor.CheckMetadata(client, APIServer, metadatafile, user, accessGroups) + if err != nil { + log.Fatal("Error in CheckMetadata function: ", err) + } //log.Printf("metadata object: %v\n", metaDataMap) // assemble list of folders (=datasets) to created diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index 75d2a02..e78ac6d 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -11,55 +11,58 @@ import ( "os" "path/filepath" "strings" - + "errors" "github.com/fatih/color" + "fmt" ) -const DUMMY_TIME = "2300-01-01T11:11:11.000Z" -const DUMMY_OWNER = "x12345" +const ( + ErrIllegalKeys = "metadata contains keys with illegal characters (., [], $, or <>)" + DUMMY_TIME = "2300-01-01T11:11:11.000Z" + DUMMY_OWNER = "x12345" +) -// getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host. -// If it encounters any error during the process, it gracefully falls back to returning the simple hostname or "unknown". -func getHost() string { - // Try to get the hostname of the current machine. - hostname, err := os.Hostname() + +func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool, err error) { + metaDataMap, err = readMetadataFromFile(metadatafile) if err != nil { - return "unknown" + return nil, "", false, err } - addrs, err := net.LookupIP(hostname) + if checkIllegalKeys(metaDataMap) { + return nil, "", false, errors.New(ErrIllegalKeys) + } + + beamlineAccount, err = checkUserAndOwnerGroup(user, accessGroups, metaDataMap) if err != nil { - return hostname + return nil, "", false, err } - for _, addr := range addrs { - if ipv4 := addr.To4(); ipv4 != nil { - ip, err := ipv4.MarshalText() - if err != nil { - return hostname - } - hosts, err := net.LookupAddr(string(ip)) - if err != nil || len(hosts) == 0 { - return hostname - } - fqdn := hosts[0] - return strings.TrimSuffix(fqdn, ".") // return fqdn without trailing dot - } + err = augmentMissingMetadata(user, metaDataMap, client, APIServer, accessGroups) + if err != nil { + return nil, "", false, err } - return hostname + + err = checkMetadataValidity(client, APIServer, metaDataMap, metaDataMap["type"].(string)) + if err != nil { + return nil, "", false, err + } + + sourceFolder, err = getSourceFolder(metaDataMap) + if err != nil { + return nil, "", false, err + } + + return metaDataMap, sourceFolder, beamlineAccount, nil } -// CheckMetadata is a function that validates and augments metadata for a dataset. -// It takes an HTTP client, an API server URL, a metadata file path, a user map, and a list of access groups as input. -// It returns a map of metadata, a source folder string, and a boolean indicating whether the dataset belongs to a beamline account. -func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool) { - // Read the full metadata from the file. +// readMetadataFromFile reads the metadata from the file and unmarshals it into a map. +func readMetadataFromFile(metadatafile string) (map[string]interface{}, error) { b, err := ioutil.ReadFile(metadatafile) // just pass the file name if err != nil { log.Fatal(err) } - // Unmarshal the JSON metadata into an interface{} object. var metadataObj interface{} // Using interface{} allows metadataObj to hold any type of data, since it has no defined methods. err = json.Unmarshal(b, &metadataObj) @@ -68,14 +71,52 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } // Use type assertion to convert the interface{} object to a map[string]interface{}. - metaDataMap = metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. - beamlineAccount = false + metaDataMap := metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type. + return metaDataMap, err +} - // Check scientificMetadata for illegal keys - if checkIllegalKeys(metaDataMap) { - panic("Metadata contains keys with illegal characters (., [], $, or <>).") +func checkIllegalKeys(metadata map[string]interface{}) bool { + for key, value := range metadata { + if containsIllegalCharacters(key) { + return true + } + + switch v := value.(type) { + case map[string]interface{}: + if checkIllegalKeys(v) { + return true + } + case []interface{}: + for _, item := range v { + switch itemValue := item.(type) { // Type switch on array item + case map[string]interface{}: + if checkIllegalKeys(itemValue) { + return true + } + // Add other cases if needed + } + } + } + } + return false +} + +func containsIllegalCharacters(s string) bool { + // Check if the string contains periods, brackets, or other illegal characters + // You can adjust this condition based on your specific requirements + for _, char := range s { + if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { + return true + } } + return false +} +// checkUserAndOwnerGroup checks the user and owner group and returns whether the user is a beamline account. +// checkUserAndOwnerGroup checks the user and owner group and returns whether the user is a beamline account. +func checkUserAndOwnerGroup(user map[string]string, accessGroups []string, metaDataMap map[string]interface{}) (bool, error) { + beamlineAccount := false + if user["displayName"] != "ingestor" { // Check if the metadata contains the "ownerGroup" key. if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom @@ -87,8 +128,8 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u break } } - if validOwner { - log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) + if validOwner { + log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) } else { // If the owner group is not valid, check for beamline-specific accounts. if creationLocation, ok := metaDataMap["creationLocation"]; ok { @@ -102,27 +143,62 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u if user["displayName"] == expectedAccount { log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount) beamlineAccount = true - } else { - log.Fatalf("You are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount) - } - } else { - // for other data just check user name - // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO - // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], - u := user["displayName"] - if strings.HasPrefix(u, "sls") || - strings.HasPrefix(u, "swissfel") || - strings.HasPrefix(u, "sinq") || - strings.HasPrefix(u, "smus") { - beamlineAccount = true + } else { + return false, fmt.Errorf("you are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount) + } + } else { + // for other data just check user name + // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO + // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], + u := user["displayName"] + if strings.HasPrefix(u, "sls") || + strings.HasPrefix(u, "swissfel") || + strings.HasPrefix(u, "sinq") || + strings.HasPrefix(u, "smus") { + beamlineAccount = true + } + } } } } - } + + return beamlineAccount, nil +} + +// getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host. +// If it encounters any error during the process, it gracefully falls back to returning the simple hostname or "unknown". +func getHost() string { + // Try to get the hostname of the current machine. + hostname, err := os.Hostname() + if err != nil { + return "unknown" } - // Check if ownerGroup is in accessGroups list + addrs, err := net.LookupIP(hostname) + if err != nil { + return hostname + } + + for _, addr := range addrs { + if ipv4 := addr.To4(); ipv4 != nil { + ip, err := ipv4.MarshalText() + if err != nil { + return hostname + } + hosts, err := net.LookupAddr(string(ip)) + if err != nil || len(hosts) == 0 { + return hostname + } + fqdn := hosts[0] + return strings.TrimSuffix(fqdn, ".") // return fqdn without trailing dot + } + } + return hostname +} +// augmentMissingMetadata augments missing metadata fields. +func augmentMissingMetadata(user map[string]string, metaDataMap map[string]interface{}, client *http.Client, APIServer string, accessGroups []string) error { + color.Set(color.FgGreen) // optionally augment missing owner metadata if _, ok := metaDataMap["owner"]; !ok { @@ -149,169 +225,141 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u } // for raw data add PI if missing if val, ok := metaDataMap["type"]; ok { - dstype := val.(string) + dstype, ok := val.(string) + if !ok { + return fmt.Errorf("type is not a string") + } if dstype == "raw" { if _, ok := metaDataMap["principalInvestigator"]; !ok { val, ok := metaDataMap["ownerGroup"] if ok { - ownerGroup := val.(string) + ownerGroup, ok := val.(string) + if !ok { + return fmt.Errorf("ownerGroup is not a string") + } proposal := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups) if val, ok := proposal["pi_email"]; ok { - metaDataMap["principalInvestigator"] = val.(string) + piEmail, ok := val.(string) + if !ok { + return fmt.Errorf("pi_email is not a string") + } + metaDataMap["principalInvestigator"] = piEmail log.Printf("principalInvestigator field added: %s", metaDataMap["principalInvestigator"]) - } else { - color.Set(color.FgRed) - log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.") - log.Printf("Please add the field explicitly to metadata file") - color.Unset() + } else { + log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.") + log.Printf("Please add the field explicitly to metadata file") + } } } } } - } - - color.Unset() - var bmm []byte - if val, ok := metaDataMap["type"]; ok { - dstype := val.(string) - // fmt.Println(errm,sourceFolder) + return nil +} - // verify data structure of meta data by calling isValid API for Dataset +// checkMetadataValidity checks the validity of the metadata by calling the appropriate API. +func checkMetadataValidity(client *http.Client, APIServer string, metaDataMap map[string]interface{}, dstype string) error { + myurl := "" + switch dstype { + case "raw": + myurl = APIServer + "/RawDatasets/isValid" + case "derived": + myurl = APIServer + "/DerivedDatasets/isValid" + case "base": + myurl = APIServer + "/Datasets/isValid" + default: + return fmt.Errorf("unknown dataset type encountered: %s", dstype) + } - myurl := "" - if dstype == "raw" { - myurl = APIServer + "/RawDatasets/isValid" - } else if dstype == "derived" { - myurl = APIServer + "/DerivedDatasets/isValid" - } else if dstype == "base" { - myurl = APIServer + "/Datasets/isValid" - } else { - log.Fatal("Unknown dataset type encountered:", dstype) - } + // add dummy data for fields which can only be filled after file scan to pass the validity test - // add dummy data for fields which can only be filled after file scan to pass the validity test - - if _, exists := metaDataMap["ownerGroup"]; !exists { - metaDataMap["ownerGroup"] = DUMMY_OWNER - } - if _, exists := metaDataMap["creationTime"]; !exists { - metaDataMap["creationTime"] = DUMMY_TIME - } - if metaDataMap["type"] == "raw" { - if _, exists := metaDataMap["endTime"]; !exists { - metaDataMap["endTime"] = DUMMY_TIME - } + if _, exists := metaDataMap["ownerGroup"]; !exists { + metaDataMap["ownerGroup"] = DUMMY_OWNER + } + if _, exists := metaDataMap["creationTime"]; !exists { + metaDataMap["creationTime"] = DUMMY_TIME + } + if metaDataMap["type"] == "raw" { + if _, exists := metaDataMap["endTime"]; !exists { + metaDataMap["endTime"] = DUMMY_TIME } + } - // add accessGroups entry for beamline if creationLocation is defined + // add accessGroups entry for beamline if creationLocation is defined - if value, exists := metaDataMap["creationLocation"]; exists { - var parts = strings.Split(value.(string), "/") - var groups []string - if len(parts) == 4 { - newGroup := strings.ToLower(parts[2]) + strings.ToLower(parts[3]) + if value, exists := metaDataMap["creationLocation"]; exists { + var parts = strings.Split(value.(string), "/") + var groups []string + if len(parts) == 4 { + newGroup := strings.ToLower(parts[2]) + strings.ToLower(parts[3]) - if ag, exists := metaDataMap["accessGroups"]; exists { - // a direct typecast does not work, this loop is needed - aInterface := ag.([]interface{}) - aString := make([]string, len(aInterface)) - for i, v := range aInterface { - aString[i] = v.(string) - } - groups = append(aString, newGroup) - } else { - groups = append(groups, newGroup) + if ag, exists := metaDataMap["accessGroups"]; exists { + // a direct typecast does not work, this loop is needed + aInterface := ag.([]interface{}) + aString := make([]string, len(aInterface)) + for i, v := range aInterface { + aString[i] = v.(string) } + groups = append(aString, newGroup) + } else { + groups = append(groups, newGroup) } - metaDataMap["accessGroups"] = groups } + metaDataMap["accessGroups"] = groups + } - bmm, _ = json.Marshal(metaDataMap) - //fmt.Printf("Marshalled meta data : %s\n", string(bmm)) - // now check validity - req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(bmm)) - if err != nil { - log.Fatal(err) - } - req.Header.Set("Content-Type", "application/json") - resp, err := client.Do(req) - if err != nil { - log.Fatal(err) - } - defer resp.Body.Close() + bmm, err := json.Marshal(metaDataMap) + if err != nil { + return err + } - body, _ := ioutil.ReadAll(resp.Body) + req, err := http.NewRequest("POST", myurl, bytes.NewBuffer(bmm)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") - // check validity - var respObj interface{} - err = json.Unmarshal(body, &respObj) - if err != nil { - log.Fatal(err) - } - respMap := respObj.(map[string]interface{}) - if respMap["valid"] != true { - log.Fatal("response Body:", string(body)) - } - } else { - log.Fatal("Undefined type field") + resp, err := client.Do(req) + if err != nil { + return err } + defer resp.Body.Close() - sourceFolder = "" - if val, ok := metaDataMap["sourceFolder"]; ok { - // turn sourceFolder into canonical form but only for online data /sls/BL/data form - sourceFolder = val.(string) - var parts = strings.Split(val.(string), "/") - if len(parts) > 3 && parts[3] == "data" && parts[1] == "sls" { - sourceFolder, err = filepath.EvalSymlinks(val.(string)) - if err != nil { - log.Fatalf("Failed to find canonical form of sourceFolder:%v %v", val, err) - } - color.Set(color.FgYellow) - log.Printf("Transform sourceFolder %v to canonical form: %v", val, sourceFolder) - color.Unset() - metaDataMap["sourceFolder"] = sourceFolder - } - } else { - log.Fatal("Undefined sourceFolder field") + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("unexpected status code: %d", resp.StatusCode) } - return metaDataMap, sourceFolder, beamlineAccount + _, err = ioutil.ReadAll(resp.Body) + if err != nil { + return err + } + + return nil } -func checkIllegalKeys(metadata map[string]interface{}) bool { - for key, value := range metadata { - if containsIllegalCharacters(key) { - return true - } +// getSourceFolder gets the source folder from the metadata. +func getSourceFolder(metaDataMap map[string]interface{}) (string, error) { + sourceFolder := "" + val, ok := metaDataMap["sourceFolder"] + if !ok { + return "", errors.New("undefined sourceFolder field") + } - switch v := value.(type) { - case map[string]interface{}: - if checkIllegalKeys(v) { - return true - } - case []interface{}: - for _, item := range v { - switch itemValue := item.(type) { // Type switch on array item - case map[string]interface{}: - if checkIllegalKeys(itemValue) { - return true - } - // Add other cases if needed - } - } - } - } - return false -} + sourceFolder, ok = val.(string) + if !ok { + return "", errors.New("sourceFolder is not a string") + } -func containsIllegalCharacters(s string) bool { - // Check if the string contains periods, brackets, or other illegal characters - // You can adjust this condition based on your specific requirements - for _, char := range s { - if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { - return true - } - } - return false + parts := strings.Split(sourceFolder, "/") + if len(parts) > 3 && parts[3] == "data" && parts[1] == "sls" { + var err error + sourceFolder, err = filepath.EvalSymlinks(sourceFolder) + if err != nil { + return "", fmt.Errorf("failed to find canonical form of sourceFolder:%v %v", sourceFolder, err) + } + log.Printf("Transform sourceFolder %v to canonical form: %v", val, sourceFolder) + metaDataMap["sourceFolder"] = sourceFolder + } + + return sourceFolder, nil } diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 5e86d8d..ee71aca 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -28,7 +28,7 @@ func TestCheckMetadata(t *testing.T) { var APIServer = TEST_API_SERVER var metadatafile1 = "testdata/metadata.json" var metadatafile2 = "testdata/metadata-short.json" - // var metadatafile3 = "testdata/metadata_illegal.json" + var metadatafile3 = "testdata/metadata_illegal.json" // Mock HTTP client client := &http.Client{ @@ -54,7 +54,10 @@ func TestCheckMetadata(t *testing.T) { accessGroups := []string{"group1", "group2"} // Call the function with mock parameters - metaDataMap, sourceFolder, beamlineAccount := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) + metaDataMap, sourceFolder, beamlineAccount, err := CheckMetadata(client, APIServer, metadatafile1, user, accessGroups) + if err != nil { + t.Error("Error in CheckMetadata function: ", err) + } // Add assertions here based on the expected behavior of the function if len(metaDataMap) == 0 { @@ -98,7 +101,10 @@ func TestCheckMetadata(t *testing.T) { } // test with the second metadata file - metaDataMap2, sourceFolder2, beamlineAccount2 := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + metaDataMap2, sourceFolder2, beamlineAccount2, err := CheckMetadata(client, APIServer, metadatafile2, user, accessGroups) + if err != nil { + t.Error("Error in CheckMetadata function: ", err) + } // Add assertions here based on the expected behavior of the function if len(metaDataMap2) == 0 { @@ -107,53 +113,16 @@ func TestCheckMetadata(t *testing.T) { if sourceFolder2 == "" { t.Error("Expected non-empty source folder") } - if sourceFolder2 != "/tmp/gnome" { - t.Error("sourceFolder should be '/tmp/gnome'") - } if reflect.TypeOf(beamlineAccount2).Kind() != reflect.Bool { t.Error("Expected beamlineAccount to be boolean") } if beamlineAccount2 != false { t.Error("Expected beamlineAccount to be false") } -} - -func TestCheckMetadata_CrashCase(t *testing.T) { - defer func() { - if recover() != nil { - t.Log("Function crashed as expected") - } else { - t.Fatal("Function did not crash as expected") - } - }() - - // Define mock parameters for the function - var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // "https://example.com/api" - var APIServer = TEST_API_SERVER - var metadatafile3 = "testdata/metadata_illegal.json" - // Mock HTTP client - client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests - Transport: &http.Transport{ - // Customize the transport settings if needed (e.g., proxy, TLS config) - // For a dummy client, default settings are usually sufficient - }, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - // Customize how redirects are handled if needed - // For a dummy client, default behavior is usually sufficient - return http.ErrUseLastResponse // Use the last response for redirects - }, + // test with the third metadata file + _, _, _, err = CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) + if err.Error() != ErrIllegalKeys { + t.Errorf("Expected error in CheckMetadata function: %s, got: %v", ErrIllegalKeys, err) } - - // Mock user map - user := map[string]string{ - "displayName": "csaxsswissfel", - "mail": "testuser@example.com", - } - - // Mock access groups - accessGroups := []string{"group1", "group2"} - // Call the function that should crash - CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) } From cc0e70ad3e9bb389bf983aa392aeaaf3ce1a842c Mon Sep 17 00:00:00 2001 From: "Ali R. Vahdati" <3798865+kavir1698@users.noreply.github.com> Date: Wed, 27 Mar 2024 12:26:49 +0100 Subject: [PATCH 8/8] Update TestCheckMetadata_CrashCase Rename checkIllegalKeys to containsIllegalKeys --- datasetIngestor/checkMetadata.go | 51 +++--------------- datasetIngestor/checkMetadata_test.go | 74 +++++++++++++-------------- 2 files changed, 43 insertions(+), 82 deletions(-) diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index 72aa8ed..c44b261 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -25,12 +25,11 @@ const ( func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool, err error) { metaDataMap, err = readMetadataFromFile(metadatafile) - if err != nil { return nil, "", false, err } - if checkIllegalKeys(metaDataMap) { + if containsIllegalKeys(metaDataMap) { return nil, "", false, errors.New(ErrIllegalKeys) } @@ -64,7 +63,6 @@ func readMetadataFromFile(metadatafile string) (map[string]interface{}, error) { if err != nil { log.Fatal(err) } - // Unmarshal the JSON metadata into an interface{} object. var metadataObj interface{} // Using interface{} allows metadataObj to hold any type of data, since it has no defined methods. err = json.Unmarshal(b, &metadataObj) @@ -77,7 +75,7 @@ func readMetadataFromFile(metadatafile string) (map[string]interface{}, error) { return metaDataMap, err } -func checkIllegalKeys(metadata map[string]interface{}) bool { +func containsIllegalKeys(metadata map[string]interface{}) bool { for key, value := range metadata { if containsIllegalCharacters(key) { return true @@ -85,14 +83,14 @@ func checkIllegalKeys(metadata map[string]interface{}) bool { switch v := value.(type) { case map[string]interface{}: - if checkIllegalKeys(v) { + if containsIllegalKeys(v) { return true } case []interface{}: for _, item := range v { switch itemValue := item.(type) { // Type switch on array item case map[string]interface{}: - if checkIllegalKeys(itemValue) { + if containsIllegalKeys(itemValue) { return true } // Add other cases if needed @@ -118,7 +116,7 @@ func containsIllegalCharacters(s string) bool { // checkUserAndOwnerGroup checks the user and owner group and returns whether the user is a beamline account. func checkUserAndOwnerGroup(user map[string]string, accessGroups []string, metaDataMap map[string]interface{}) (bool, error) { beamlineAccount := false - + if user["displayName"] != "ingestor" { // Check if the metadata contains the "ownerGroup" key. if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom @@ -364,41 +362,4 @@ func getSourceFolder(metaDataMap map[string]interface{}) (string, error) { } return sourceFolder, nil -} - -func checkIllegalKeys(metadata map[string]interface{}) bool { - for key, value := range metadata { - if containsIllegalCharacters(key) { - return true - } - - switch v := value.(type) { - case map[string]interface{}: - if checkIllegalKeys(v) { - return true - } - case []interface{}: - for _, item := range v { - switch itemValue := item.(type) { // Type switch on array item - case map[string]interface{}: - if checkIllegalKeys(itemValue) { - return true - } - // Add other cases if needed - } - } - } - } - return false -} - -func containsIllegalCharacters(s string) bool { - // Check if the string contains periods, brackets, or other illegal characters - // You can adjust this condition based on your specific requirements - for _, char := range s { - if char == '.' || char == '[' || char == ']' || char == '<' || char == '>' || char == '$' { - return true - } - } - return false -} +} \ No newline at end of file diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index 9419541..a94a6d6 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -124,41 +124,41 @@ func TestCheckMetadata(t *testing.T) { } func TestCheckMetadata_CrashCase(t *testing.T) { - defer func() { - if recover() != nil { - t.Log("Function crashed as expected") - } else { - t.Fatal("Function did not crash as expected") - } - }() - - // Define mock parameters for the function - var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" - var APIServer = TEST_API_SERVER - var metadatafile3 = "testdata/metadata_illegal.json" - - // Mock HTTP client - client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests - Transport: &http.Transport{ - // Customize the transport settings if needed (e.g., proxy, TLS config) - // For a dummy client, default settings are usually sufficient - }, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - // Customize how redirects are handled if needed - // For a dummy client, default behavior is usually sufficient - return http.ErrUseLastResponse // Use the last response for redirects - }, - } - - // Mock user map - user := map[string]string{ - "displayName": "csaxsswissfel", - "mail": "testuser@example.com", - } - - // Mock access groups - accessGroups := []string{"group1", "group2"} - // Call the function that should crash - CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // TODO: Test Improvement. Change this to a mock server. At the moment, tests will fail if we change this to a mock server. + var APIServer = TEST_API_SERVER + var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + + // Call the function that should return an error + _, _, _, err := CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) + + // Check that the function returned the expected error + if err == nil { + t.Fatal("Function did not return an error as expected") + } else if err.Error() != ErrIllegalKeys { + t.Errorf("Expected error %q, got %q", ErrIllegalKeys, err.Error()) + } }