Skip to content

Commit

Permalink
improve checkMetadata
Browse files Browse the repository at this point in the history
  • Loading branch information
consolethinks committed Aug 23, 2024
1 parent 96bbcb9 commit 8d7c67f
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 143 deletions.
235 changes: 134 additions & 101 deletions datasetIngestor/checkMetadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,12 @@ const raw = "raw"

func CheckMetadata(client *http.Client, APIServer string, metadatafile string, user map[string]string, accessGroups []string) (metaDataMap map[string]interface{}, sourceFolder string, beamlineAccount bool, err error) {
metaDataMap, err = readMetadataFromFile(metadatafile)
// TODO this error handler doesn't make sense for now considering the "readMetadataFromFile" directly shuts down execution, instead of returning an error.
if err != nil {
return nil, "", false, err
}

if containsIllegalKeys(metaDataMap) {
return nil, "", false, errors.New(ErrIllegalKeys)
if keys := collectIllegalKeys(metaDataMap); len(keys) > 0 {
return nil, "", false, errors.New(ErrIllegalKeys + ": \"" + strings.Join(keys, "\", \"") + "\"")
}

beamlineAccount, err = checkUserAndOwnerGroup(user, accessGroups, metaDataMap)
Expand All @@ -47,7 +46,7 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u
return nil, "", false, err
}

err = checkMetadataValidity(client, APIServer, metaDataMap, metaDataMap["type"].(string))
err = checkMetadataValidity(client, APIServer, metaDataMap)
if err != nil {
return nil, "", false, err
}
Expand All @@ -64,47 +63,49 @@ func CheckMetadata(client *http.Client, APIServer string, metadatafile string, u
func readMetadataFromFile(metadatafile string) (map[string]interface{}, error) {
b, err := os.ReadFile(metadatafile) // just pass the file name
if err != nil {
// TODO convert this into return
log.Fatal(err)
return nil, err
}
// Unmarshal the JSON metadata into an interface{} object.
var metadataObj interface{} // Using interface{} allows metadataObj to hold any type of data, since it has no defined methods.
err = json.Unmarshal(b, &metadataObj)
if err != nil {
// TODO convert this into return
log.Fatal(err)
return nil, err
}

// Use type assertion to convert the interface{} object to a map[string]interface{}.
metaDataMap := metadataObj.(map[string]interface{}) // `.(` is type assertion: a way to extract the underlying value of an interface and check whether it's of a specific type.
return metaDataMap, err
}

// NOTE the only keys that are invalid are the ones with illegal chars., uses recursion, doesn't return error info (eg. which key)...
func containsIllegalKeys(metadata map[string]interface{}) bool {
for key, value := range metadata {
if containsIllegalCharacters(key) {
return true
}
// collects keys with illegal characters
func collectIllegalKeys(metadata map[string]interface{}) []string {
stack := []map[string]interface{}{metadata}
keys := []string{}

switch v := value.(type) {
case map[string]interface{}:
if containsIllegalKeys(v) {
return true
for len(stack) > 0 {
item := stack[len(stack)-1]
stack = stack[:len(stack)-1]

for key, value := range item {
if containsIllegalCharacters(key) {
keys = append(keys, key)
}
case []interface{}:
for _, item := range v {
switch itemValue := item.(type) { // Type switch on array item
case map[string]interface{}:
if containsIllegalKeys(itemValue) {
return true

switch v := value.(type) {
case map[string]interface{}:
stack = append(stack, v)
case []interface{}:
for _, vItem := range v {
// convert this to switch if other types are needed
if vMap, ok := vItem.(map[string]interface{}); ok {
stack = append(stack, vMap)
}
// Add other cases if needed
}
}
}
}
return false

return keys
}

func containsIllegalCharacters(s string) bool {
Expand All @@ -120,54 +121,57 @@ func containsIllegalCharacters(s string) bool {

// checkUserAndOwnerGroup checks the user and owner group and returns whether the user is a beamline account.
func checkUserAndOwnerGroup(user map[string]string, accessGroups []string, metaDataMap map[string]interface{}) (bool, error) {
beamlineAccount := false

if user["displayName"] != "ingestor" {
// Check if the metadata contains the "ownerGroup" key.
if ownerGroup, ok := metaDataMap["ownerGroup"]; ok { // type assertion with a comma-ok idiom
validOwner := false
// Iterate over accessGroups to validate the owner group.
for _, b := range accessGroups {
if b == ownerGroup {
validOwner = true
break
}
}
if validOwner {
log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup)
} else {
// If the owner group is not valid, check for beamline-specific accounts.
if creationLocation, ok := metaDataMap["creationLocation"]; ok {
// Split the creationLocation string to extract beamline-specific information.
parts := strings.Split(creationLocation.(string), "/")
expectedAccount := ""
if len(parts) == 4 {
expectedAccount = strings.ToLower(parts[2]) + strings.ToLower(parts[3])
}
// If the user matches the expected beamline account, grant ingest access.
if user["displayName"] == expectedAccount {
log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount)
beamlineAccount = true
} else {
return false, fmt.Errorf("you are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount)
}
} else {
// for other data just check user name
// this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO
// facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"],
u := user["displayName"]
if strings.HasPrefix(u, "sls") ||
strings.HasPrefix(u, "swissfel") ||
strings.HasPrefix(u, "sinq") ||
strings.HasPrefix(u, "smus") {
beamlineAccount = true
}
}
}
if user["displayName"] == "ingestor" {
return false, nil
}

// Check if the metadata contains the "ownerGroup" key.
ownerGroup, ok := metaDataMap["ownerGroup"]
if !ok {
// NOTE: so if there's no ownergroup, we can pass this check?
return false, nil
}

// Iterate over accessGroups to validate the owner group.
for _, b := range accessGroups {
if b == ownerGroup {
log.Printf("OwnerGroup information %s verified successfully.\n", ownerGroup)
return false, nil
}
}

return beamlineAccount, nil
// NOTE: beamline accounts seem to be very PSI specific.
// If the owner group is not valid, check for beamline-specific accounts.
if creationLocation, ok := metaDataMap["creationLocation"]; ok {
// Split the creationLocation string to extract beamline-specific information.
parts := strings.Split(creationLocation.(string), "/")
expectedAccount := ""
if len(parts) == 4 {
expectedAccount = strings.ToLower(parts[2]) + strings.ToLower(parts[3])
}
// If the user matches the expected beamline account, grant ingest access.
if user["displayName"] == expectedAccount {
log.Printf("Beamline specific dataset %s - ingest granted.\n", expectedAccount)
return true, nil
} else {
return false, fmt.Errorf("you are neither member of the ownerGroup %s nor the needed beamline account %s", ownerGroup, expectedAccount)
}
} else {
// for other data just check user name
// this is a quick and dirty test. Should be replaced by test for "globalaccess" role. TODO
// facilities: ["SLS", "SINQ", "SWISSFEL", "SmuS"],
u := user["displayName"]
if strings.HasPrefix(u, "sls") ||
strings.HasPrefix(u, "swissfel") ||
strings.HasPrefix(u, "sinq") ||
strings.HasPrefix(u, "smus") {
return true, nil
}
}

// NOTE: we can get to this part after the last else,
// this lacks an error state for not being a beamline account or part of an expected owner group?
return false, nil
}

// getHost is a function that attempts to retrieve and return the fully qualified domain name (FQDN) of the current host.
Expand Down Expand Up @@ -204,6 +208,8 @@ func getHost() string {
// augmentMissingMetadata augments missing metadata fields.
func augmentMissingMetadata(user map[string]string, metaDataMap map[string]interface{}, client *http.Client, APIServer string, accessGroups []string) error {
color.Set(color.FgGreen)
defer color.Unset()

// optionally augment missing owner metadata
if _, ok := metaDataMap["owner"]; !ok {
metaDataMap["owner"] = user["displayName"]
Expand All @@ -217,6 +223,7 @@ func augmentMissingMetadata(user map[string]string, metaDataMap map[string]inter
metaDataMap["contactEmail"] = user["mail"]
log.Printf("contactEmail field added: %s", metaDataMap["contactEmail"])
}

// and sourceFolderHost
if _, ok := metaDataMap["sourceFolderHost"]; !ok {
hostname := getHost()
Expand All @@ -227,44 +234,69 @@ func augmentMissingMetadata(user map[string]string, metaDataMap map[string]inter
log.Printf("sourceFolderHost field added: %s", metaDataMap["sourceFolderHost"])
}
}

// for raw data add PI if missing
if val, ok := metaDataMap["type"]; ok {
dstype, ok := val.(string)
if err := addPrincipalInvestigatorFromProposal(user, metaDataMap, client, APIServer, accessGroups); err != nil {
return err
}

return nil
}

func addPrincipalInvestigatorFromProposal(user map[string]string, metaDataMap map[string]interface{}, client *http.Client, APIServer string, accessGroups []string) error {
typeVal, ok := metaDataMap["type"]
if !ok {
return nil
}
dstype, ok := typeVal.(string)
if !ok {
return fmt.Errorf("type is not a string")
}
if dstype != raw {
// exit if not raw
return nil
}

if _, ok := metaDataMap["principalInvestigator"]; ok {
return nil
}

val, ok := metaDataMap["ownerGroup"]
if !ok {
return nil
}

ownerGroup, ok := val.(string)
if !ok {
return fmt.Errorf("ownerGroup is not a string")
}

proposal, err := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups)
if err != nil {
return fmt.Errorf("error: %v", err)
}

if val, ok := proposal["pi_email"]; ok {
piEmail, ok := val.(string)
if !ok {
return fmt.Errorf("type is not a string")
}
if dstype == raw {
if _, ok := metaDataMap["principalInvestigator"]; !ok {
val, ok := metaDataMap["ownerGroup"]
if ok {
ownerGroup, ok := val.(string)
if !ok {
return fmt.Errorf("ownerGroup is not a string")
}
proposal, err := datasetUtils.GetProposal(client, APIServer, ownerGroup, user, accessGroups)
if err != nil {
log.Fatalf("Error: %v\n", err)
}
if val, ok := proposal["pi_email"]; ok {
piEmail, ok := val.(string)
if !ok {
return fmt.Errorf("pi_email is not a string")
}
metaDataMap["principalInvestigator"] = piEmail
log.Printf("principalInvestigator field added: %s", metaDataMap["principalInvestigator"])
} else {
log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.")
log.Printf("Please add the field explicitly to metadata file")
}
}
}
return fmt.Errorf("pi_email is not a string")
}
metaDataMap["principalInvestigator"] = piEmail
log.Printf("principalInvestigator field added: %s", metaDataMap["principalInvestigator"])
} else {
log.Printf("principalInvestigator field missing for raw data and could not be added from proposal data.")
log.Printf("Please add the field explicitly to metadata file")
}
return nil
}

// checkMetadataValidity checks the validity of the metadata by calling the appropriate API.
func checkMetadataValidity(client *http.Client, APIServer string, metaDataMap map[string]interface{}, dstype string) error {
func checkMetadataValidity(client *http.Client, APIServer string, metaDataMap map[string]interface{}) error {
dstype, ok := metaDataMap["type"].(string)
if !ok {
return fmt.Errorf("metadata type isn't a string")
}

myurl := ""
switch dstype {
case raw:
Expand Down Expand Up @@ -356,6 +388,7 @@ func getSourceFolder(metaDataMap map[string]interface{}) (string, error) {
return "", errors.New("sourceFolder is not a string")
}

// NOTE: this part seems very PSI specific
parts := strings.Split(sourceFolder, "/")
if len(parts) > 3 && parts[3] == "data" && parts[1] == "sls" {
var err error
Expand Down
Loading

0 comments on commit 8d7c67f

Please sign in to comment.