From 8d7c67fe0fcd61fcae389b3bde82616cd6b37d1b Mon Sep 17 00:00:00 2001 From: consolethinks Date: Tue, 23 Jul 2024 11:36:07 +0200 Subject: [PATCH] improve checkMetadata --- datasetIngestor/checkMetadata.go | 235 +++++++++++++++----------- datasetIngestor/checkMetadata_test.go | 84 ++++----- 2 files changed, 176 insertions(+), 143 deletions(-) diff --git a/datasetIngestor/checkMetadata.go b/datasetIngestor/checkMetadata.go index 0cac687..3a1f057 100644 --- a/datasetIngestor/checkMetadata.go +++ b/datasetIngestor/checkMetadata.go @@ -28,13 +28,12 @@ const raw = "raw" func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool, err error) { metaDataMap, err = readMetadataFromFile(metadatafile) - // TODO this error handler doesn't make sense for now considering the "readMetadataFromFile" directly shuts down execution, instead of returning an error. if err != nil { return nil, "", false, err } - if containsIllegalKeys(metaDataMap) { - return nil, "", false, errors.New(ErrIllegalKeys) + if keys := collectIllegalKeys(metaDataMap); len(keys) > 0 { + return nil, "", false, errors.New(ErrIllegalKeys + ": \"" + strings.Join(keys, "\", \"") + "\"") } beamlineAccount, err = checkUserAndOwnerGroup(user, accessGroups, metaDataMap) @@ -47,7 +46,7 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u return nil, "", false, err } - err = checkMetadataValidity(client, APIServer, metaDataMap, metaDataMap["type"].(string)) + err = checkMetadataValidity(client, APIServer, metaDataMap) if err != nil { return nil, "", false, err } @@ -64,15 +63,13 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u func readMetadataFromFile(metadatafile string) (map[string]interface{}, error) { b, err := os.ReadFile(metadatafile) // just pass the file name if err != nil { - // TODO convert this into return - log.Fatal(err) + return nil, err } // Unmarshal the JSON metadata into an interface{} object. var metadataObj interface{} // Using interface{} allows metadataObj to hold any type of data, since it has no defined methods. err = json.Unmarshal(b, &metadataObj) if err != nil { - // TODO convert this into return - log.Fatal(err) + return nil, err } // Use type assertion to convert the interface{} object to a map[string]interface{}. @@ -80,31 +77,35 @@ func readMetadataFromFile(metadatafile string) (map[string]interface{}, error) { return metaDataMap, err } -// NOTE the only keys that are invalid are the ones with illegal chars., uses recursion, doesn't return error info (eg. which key)... -func containsIllegalKeys(metadata map[string]interface{}) bool { - for key, value := range metadata { - if containsIllegalCharacters(key) { - return true - } +// collects keys with illegal characters +func collectIllegalKeys(metadata map[string]interface{}) []string { + stack := []map[string]interface{}{metadata} + keys := []string{} - switch v := value.(type) { - case map[string]interface{}: - if containsIllegalKeys(v) { - return true + for len(stack) > 0 { + item := stack[len(stack)-1] + stack = stack[:len(stack)-1] + + for key, value := range item { + if containsIllegalCharacters(key) { + keys = append(keys, key) } - case []interface{}: - for _, item := range v { - switch itemValue := item.(type) { // Type switch on array item - case map[string]interface{}: - if containsIllegalKeys(itemValue) { - return true + + switch v := value.(type) { + case map[string]interface{}: + stack = append(stack, v) + case []interface{}: + for _, vItem := range v { + // convert this to switch if other types are needed + if vMap, ok := vItem.(map[string]interface{}); ok { + stack = append(stack, vMap) } - // Add other cases if needed } } } } - return false + + return keys } func containsIllegalCharacters(s string) bool { @@ -120,54 +121,57 @@ func containsIllegalCharacters(s string) bool { // checkUserAndOwnerGroup checks the user and owner group and returns whether the user is a beamline account. func checkUserAndOwnerGroup(user map[string]string, accessGroups []string, metaDataMap map[string]interface{}) (bool, error) { - beamlineAccount := false - - if user["displayName"] != "ingestor" { - // Check if the metadata contains the "ownerGroup" key. - if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom - validOwner := false - // Iterate over accessGroups to validate the owner group. - for _, b := range accessGroups { - if b == ownerGroup { - validOwner = true - break - } - } - if validOwner { - log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) - } else { - // If the owner group is not valid, check for beamline-specific accounts. - if creationLocation, ok := metaDataMap["creationLocation"]; ok { - // Split the creationLocation string to extract beamline-specific information. - parts := strings.Split(creationLocation.(string), "/") - expectedAccount := "" - if len(parts) == 4 { - expectedAccount = strings.ToLower(parts[2]) + strings.ToLower(parts[3]) - } - // If the user matches the expected beamline account, grant ingest access. - if user["displayName"] == expectedAccount { - log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount) - beamlineAccount = true - } else { - return false, fmt.Errorf("you are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount) - } - } else { - // for other data just check user name - // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO - // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], - u := user["displayName"] - if strings.HasPrefix(u, "sls") || - strings.HasPrefix(u, "swissfel") || - strings.HasPrefix(u, "sinq") || - strings.HasPrefix(u, "smus") { - beamlineAccount = true - } - } - } + if user["displayName"] == "ingestor" { + return false, nil + } + + // Check if the metadata contains the "ownerGroup" key. + ownerGroup, ok := metaDataMap["ownerGroup"] + if !ok { + // NOTE: so if there's no ownergroup, we can pass this check? + return false, nil + } + + // Iterate over accessGroups to validate the owner group. + for _, b := range accessGroups { + if b == ownerGroup { + log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup) + return false, nil } } - return beamlineAccount, nil + // NOTE: beamline accounts seem to be very PSI specific. + // If the owner group is not valid, check for beamline-specific accounts. + if creationLocation, ok := metaDataMap["creationLocation"]; ok { + // Split the creationLocation string to extract beamline-specific information. + parts := strings.Split(creationLocation.(string), "/") + expectedAccount := "" + if len(parts) == 4 { + expectedAccount = strings.ToLower(parts[2]) + strings.ToLower(parts[3]) + } + // If the user matches the expected beamline account, grant ingest access. + if user["displayName"] == expectedAccount { + log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount) + return true, nil + } else { + return false, fmt.Errorf("you are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount) + } + } else { + // for other data just check user name + // this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO + // facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"], + u := user["displayName"] + if strings.HasPrefix(u, "sls") || + strings.HasPrefix(u, "swissfel") || + strings.HasPrefix(u, "sinq") || + strings.HasPrefix(u, "smus") { + return true, nil + } + } + + // NOTE: we can get to this part after the last else, + // this lacks an error state for not being a beamline account or part of an expected owner group? + return false, nil } // getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host. @@ -204,6 +208,8 @@ func getHost() string { // augmentMissingMetadata augments missing metadata fields. func augmentMissingMetadata(user map[string]string, metaDataMap map[string]interface{}, client *http.Client, APIServer string, accessGroups []string) error { color.Set(color.FgGreen) + defer color.Unset() + // optionally augment missing owner metadata if _, ok := metaDataMap["owner"]; !ok { metaDataMap["owner"] = user["displayName"] @@ -217,6 +223,7 @@ func augmentMissingMetadata(user map[string]string, metaDataMap map[string]inter metaDataMap["contactEmail"] = user["mail"] log.Printf("contactEmail field added: %s", metaDataMap["contactEmail"]) } + // and sourceFolderHost if _, ok := metaDataMap["sourceFolderHost"]; !ok { hostname := getHost() @@ -227,44 +234,69 @@ func augmentMissingMetadata(user map[string]string, metaDataMap map[string]inter log.Printf("sourceFolderHost field added: %s", metaDataMap["sourceFolderHost"]) } } + // for raw data add PI if missing - if val, ok := metaDataMap["type"]; ok { - dstype, ok := val.(string) + if err := addPrincipalInvestigatorFromProposal(user, metaDataMap, client, APIServer, accessGroups); err != nil { + return err + } + + return nil +} + +func addPrincipalInvestigatorFromProposal(user map[string]string, metaDataMap map[string]interface{}, client *http.Client, APIServer string, accessGroups []string) error { + typeVal, ok := metaDataMap["type"] + if !ok { + return nil + } + dstype, ok := typeVal.(string) + if !ok { + return fmt.Errorf("type is not a string") + } + if dstype != raw { + // exit if not raw + return nil + } + + if _, ok := metaDataMap["principalInvestigator"]; ok { + return nil + } + + val, ok := metaDataMap["ownerGroup"] + if !ok { + return nil + } + + ownerGroup, ok := val.(string) + if !ok { + return fmt.Errorf("ownerGroup is not a string") + } + + proposal, err := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups) + if err != nil { + return fmt.Errorf("error: %v", err) + } + + if val, ok := proposal["pi_email"]; ok { + piEmail, ok := val.(string) if !ok { - return fmt.Errorf("type is not a string") - } - if dstype == raw { - if _, ok := metaDataMap["principalInvestigator"]; !ok { - val, ok := metaDataMap["ownerGroup"] - if ok { - ownerGroup, ok := val.(string) - if !ok { - return fmt.Errorf("ownerGroup is not a string") - } - proposal, err := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups) - if err != nil { - log.Fatalf("Error: %v\n", err) - } - if val, ok := proposal["pi_email"]; ok { - piEmail, ok := val.(string) - if !ok { - return fmt.Errorf("pi_email is not a string") - } - metaDataMap["principalInvestigator"] = piEmail - log.Printf("principalInvestigator field added: %s", metaDataMap["principalInvestigator"]) - } else { - log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.") - log.Printf("Please add the field explicitly to metadata file") - } - } - } + return fmt.Errorf("pi_email is not a string") } + metaDataMap["principalInvestigator"] = piEmail + log.Printf("principalInvestigator field added: %s", metaDataMap["principalInvestigator"]) + } else { + log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.") + log.Printf("Please add the field explicitly to metadata file") } return nil } // checkMetadataValidity checks the validity of the metadata by calling the appropriate API. -func checkMetadataValidity(client *http.Client, APIServer string, metaDataMap map[string]interface{}, dstype string) error { +func checkMetadataValidity(client *http.Client, APIServer string, metaDataMap map[string]interface{}) error { + dstype, ok := metaDataMap["type"].(string) + if !ok { + return fmt.Errorf("metadata type isn't a string") + } + myurl := "" switch dstype { case raw: @@ -356,6 +388,7 @@ func getSourceFolder(metaDataMap map[string]interface{}) (string, error) { return "", errors.New("sourceFolder is not a string") } + // NOTE: this part seems very PSI specific parts := strings.Split(sourceFolder, "/") if len(parts) > 3 && parts[3] == "data" && parts[1] == "sls" { var err error diff --git a/datasetIngestor/checkMetadata_test.go b/datasetIngestor/checkMetadata_test.go index a94a6d6..c2440cb 100644 --- a/datasetIngestor/checkMetadata_test.go +++ b/datasetIngestor/checkMetadata_test.go @@ -2,9 +2,9 @@ package datasetIngestor import ( "net/http" + "reflect" "testing" "time" - "reflect" ) func TestGetHost(t *testing.T) { @@ -31,7 +31,7 @@ func TestCheckMetadata(t *testing.T) { // Mock HTTP client client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests + Timeout: 5 * time.Second, // Set a timeout for requests Transport: &http.Transport{ // Customize the transport settings if needed (e.g., proxy, TLS config) // For a dummy client, default settings are usually sufficient @@ -75,13 +75,13 @@ func TestCheckMetadata(t *testing.T) { t.Error("Expected beamlineAccount to be false") } if _, ok := metaDataMap["ownerEmail"]; !ok { - t.Error("metaDataMap missing required key 'ownerEmail'") + t.Error("metaDataMap missing required key 'ownerEmail'") } if _, ok := metaDataMap["principalInvestigator"]; !ok { - t.Error("metaDataMap missing required key 'principalInvestigator'") + t.Error("metaDataMap missing required key 'principalInvestigator'") } if _, ok := metaDataMap["scientificMetadata"]; !ok { - t.Error("metaDataMap missing required key 'scientificMetadata'") + t.Error("metaDataMap missing required key 'scientificMetadata'") } scientificMetadata, ok := metaDataMap["scientificMetadata"].([]interface{}) if ok { @@ -124,41 +124,41 @@ func TestCheckMetadata(t *testing.T) { } func TestCheckMetadata_CrashCase(t *testing.T) { - // Define mock parameters for the function - var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // TODO: Test Improvement. Change this to a mock server. At the moment, tests will fail if we change this to a mock server. - var APIServer = TEST_API_SERVER - var metadatafile3 = "testdata/metadata_illegal.json" - - // Mock HTTP client - client := &http.Client{ - Timeout: 5 * time.Second, // Set a timeout for requests - Transport: &http.Transport{ - // Customize the transport settings if needed (e.g., proxy, TLS config) - // For a dummy client, default settings are usually sufficient - }, - CheckRedirect: func(req *http.Request, via []*http.Request) error { - // Customize how redirects are handled if needed - // For a dummy client, default behavior is usually sufficient - return http.ErrUseLastResponse // Use the last response for redirects - }, - } - - // Mock user map - user := map[string]string{ - "displayName": "csaxsswissfel", - "mail": "testuser@example.com", - } - - // Mock access groups - accessGroups := []string{"group1", "group2"} - - // Call the function that should return an error - _, _, _, err := CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) - - // Check that the function returned the expected error - if err == nil { - t.Fatal("Function did not return an error as expected") - } else if err.Error() != ErrIllegalKeys { - t.Errorf("Expected error %q, got %q", ErrIllegalKeys, err.Error()) - } + // Define mock parameters for the function + var TEST_API_SERVER string = "https://dacat-qa.psi.ch/api/v3" // TODO: Test Improvement. Change this to a mock server. At the moment, tests will fail if we change this to a mock server. + var APIServer = TEST_API_SERVER + var metadatafile3 = "testdata/metadata_illegal.json" + + // Mock HTTP client + client := &http.Client{ + Timeout: 5 * time.Second, // Set a timeout for requests + Transport: &http.Transport{ + // Customize the transport settings if needed (e.g., proxy, TLS config) + // For a dummy client, default settings are usually sufficient + }, + CheckRedirect: func(req *http.Request, via []*http.Request) error { + // Customize how redirects are handled if needed + // For a dummy client, default behavior is usually sufficient + return http.ErrUseLastResponse // Use the last response for redirects + }, + } + + // Mock user map + user := map[string]string{ + "displayName": "csaxsswissfel", + "mail": "testuser@example.com", + } + + // Mock access groups + accessGroups := []string{"group1", "group2"} + + // Call the function that should return an error + _, _, _, err := CheckMetadata(client, APIServer, metadatafile3, user, accessGroups) + + // Check that the function returned the expected error + if err == nil { + t.Fatal("Function did not return an error as expected") + } else if err.Error() != ErrIllegalKeys+": \"description.\", \"name]\"" { + t.Errorf("Expected error %q, got %q", ErrIllegalKeys, err.Error()) + } }