diff --git a/agent-install/agent-install.sh b/agent-install/agent-install.sh index 114a4791f..38cacf8fe 100755 --- a/agent-install/agent-install.sh +++ b/agent-install/agent-install.sh @@ -61,6 +61,7 @@ CLUSTER_ROLE_BINDING_NAME="openhorizon-agent-cluster-rule" ROLE_BINDING_NAME="role-binding" DEPLOYMENT_NAME="agent" SECRET_NAME="openhorizon-agent-secrets" +IMAGE_PULL_SECRET_NAME="registry-creds" CRONJOB_AUTO_UPGRADE_NAME="auto-upgrade-cronjob" IMAGE_REGISTRY_SECRET_NAME="openhorizon-agent-secrets-docker-cert" CONFIGMAP_NAME="openhorizon-agent-config" @@ -73,6 +74,7 @@ EDGE_CLUSTER_TAR_FILE_NAME='horizon-agent-edge-cluster-files.tar.gz' # The following variables will need to have the $ARCH prepended before they can be used DEFAULT_AGENT_K8S_IMAGE_TAR_FILE='_anax_k8s.tar.gz' DEFAULT_CRONJOB_AUTO_UPGRADE_K8S_TAR_FILE='_auto-upgrade-cronjob_k8s.tar.gz' +DEFAULT_INIT_CONTAINER_IMAGE_PATH="public.ecr.aws/docker/library/alpine:latest" # agent upgrade types. To update the certificate only, just do "-G cert" or set AGENT_UPGRADE_TYPES="cert" UPGRADE_TYPE_SW="software" @@ -150,6 +152,7 @@ Optional Edge Device Environment Variables For Testing New Distros - Not For Pro Additional Edge Cluster Variables (in environment or config file): IMAGE_ON_EDGE_CLUSTER_REGISTRY: override the agent image path (without tag) if you want it to be different from what this script will default it to CRONJOB_AUTO_UPGRADE_IMAGE_ON_EDGE_CLUSTER_REGISTRY: override the auto-upgrade-cronjob cronjob image path (without tag) if you want it to be different from what this script will default it to + INIT_CONTAINER_IMAGE: specify this value if init container is needed and is different from default: public.ecr.aws/docker/library/alpine:latest EDGE_CLUSTER_REGISTRY_USERNAME: specify this value if the edge cluster registry requires authentication EDGE_CLUSTER_REGISTRY_TOKEN: specify this value if the edge cluster registry requires authentication EDGE_CLUSTER_STORAGE_CLASS: the storage class to use for the agent and edge services. Default: gp2 @@ -1181,7 +1184,7 @@ function get_all_variables() { get_variable EDGE_CLUSTER_STORAGE_CLASS 'gp2' get_variable AGENT_NAMESPACE "$DEFAULT_AGENT_NAMESPACE" get_variable NAMESPACE_SCOPED 'false' - USE_EDGE_CLUSTER_REGISTRY='true' #get_variable USE_EDGE_CLUSTER_REGISTRY 'true' # currently true is the only supported value + get_variable USE_EDGE_CLUSTER_REGISTRY 'true' get_variable AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS '75' local image_arch=$(get_cluster_image_arch) @@ -1189,33 +1192,42 @@ function get_all_variables() { DEFAULT_AGENT_K8S_IMAGE_TAR_FILE=${image_arch}${DEFAULT_AGENT_K8S_IMAGE_TAR_FILE} DEFAULT_CRONJOB_AUTO_UPGRADE_K8S_TAR_FILE=${image_arch}${DEFAULT_CRONJOB_AUTO_UPGRADE_K8S_TAR_FILE} + local default_image_registry_on_edge_cluster + local default_auto_upgrade_cronjob_image_registry_on_edge_cluster + isImageVariableRequired=true if [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]]; then - local default_image_registry_on_edge_cluster - local default_auto_upgrade_cronjob_image_registry_on_edge_cluster if [[ $KUBECTL == "microk8s.kubectl" ]]; then default_image_registry_on_edge_cluster="localhost:32000/$AGENT_NAMESPACE/${image_arch}_anax_k8s" + isImageVariableRequired=false elif [[ $KUBECTL == "k3s kubectl" ]]; then local k3s_registry_endpoint=$($KUBECTL get service docker-registry-service | grep docker-registry-service | awk '{print $3;}'):5000 default_image_registry_on_edge_cluster="$k3s_registry_endpoint/$AGENT_NAMESPACE/${image_arch}_anax_k8s" + isImageVariableRequired=false elif is_ocp_cluster; then local ocp_registry_endpoint=$($KUBECTL get route default-route -n openshift-image-registry --template='{{ .spec.host }}') default_image_registry_on_edge_cluster="$ocp_registry_endpoint/$AGENT_NAMESPACE/${image_arch}_anax_k8s" - else - isImageVariableRequired=true + isImageVariableRequired=false fi + # image variable $IMAGE_ON_EDGE_CLUSTER_REGISTRY is required - get_variable IMAGE_ON_EDGE_CLUSTER_REGISTRY "$default_image_registry_on_edge_cluster" ${isImageVariableRequired} - log_debug "default_image_registry_on_edge_cluster: $default_image_registry_on_edge_cluster, IMAGE_ON_EDGE_CLUSTER_REGISTRY: $IMAGE_ON_EDGE_CLUSTER_REGISTRY" - # set $default_auto_upgrade_cronjob_image_registry_on_edge_cluster from IMAGE_ON_EDGE_CLUSTER_REGISTRY - default_auto_upgrade_cronjob_image_registry_on_edge_cluster="${IMAGE_ON_EDGE_CLUSTER_REGISTRY%/*}/${image_arch}_auto-upgrade-cronjob_k8s" - log_debug "default_auto_upgrade_cronjob_image_registry_on_edge_cluster: $default_auto_upgrade_cronjob_image_registry_on_edge_cluster" - get_variable CRONJOB_AUTO_UPGRADE_IMAGE_ON_EDGE_CLUSTER_REGISTRY "$default_auto_upgrade_cronjob_image_registry_on_edge_cluster" - get_variable EDGE_CLUSTER_REGISTRY_USERNAME - get_variable EDGE_CLUSTER_REGISTRY_TOKEN get_variable INTERNAL_URL_FOR_EDGE_CLUSTER_REGISTRY - get_variable AGENT_K8S_IMAGE_TAR_FILE "$DEFAULT_AGENT_K8S_IMAGE_TAR_FILE" - get_variable CRONJOB_AUTO_UPGRADE_K8S_TAR_FILE "$DEFAULT_CRONJOB_AUTO_UPGRADE_K8S_TAR_FILE" fi + # not use edge cluster local registry, use remote + get_variable IMAGE_ON_EDGE_CLUSTER_REGISTRY "$default_image_registry_on_edge_cluster" ${isImageVariableRequired} + log_debug "default_image_registry_on_edge_cluster: $default_image_registry_on_edge_cluster, IMAGE_ON_EDGE_CLUSTER_REGISTRY: $IMAGE_ON_EDGE_CLUSTER_REGISTRY" + + # set $CRONJOB_AUTO_UPGRADE_IMAGE_ON_EDGE_CLUSTER_REGISTRY from $IMAGE_ON_EDGE_CLUSTER_REGISTRY + auto_upgrade_cronjob_image_registry_on_edge_cluster="${IMAGE_ON_EDGE_CLUSTER_REGISTRY%%_*}_auto-upgrade-cronjob_k8s" + get_variable CRONJOB_AUTO_UPGRADE_IMAGE_ON_EDGE_CLUSTER_REGISTRY "$auto_upgrade_cronjob_image_registry_on_edge_cluster" + get_variable INIT_CONTAINER_IMAGE "$DEFAULT_INIT_CONTAINER_IMAGE_PATH" + + get_variable EDGE_CLUSTER_REGISTRY_USERNAME + get_variable EDGE_CLUSTER_REGISTRY_TOKEN + if [[ ( -z $EDGE_CLUSTER_REGISTRY_USERNAME && -n $EDGE_CLUSTER_REGISTRY_TOKEN ) || ( -n $EDGE_CLUSTER_REGISTRY_USERNAME && -z $EDGE_CLUSTER_REGISTRY_TOKEN ) ]]; then + log_fatal 1 "EDGE_CLUSTER_REGISTRY_USERNAME and EDGE_CLUSTER_REGISTRY_TOKEN should be set/unset together" + fi + get_variable AGENT_K8S_IMAGE_TAR_FILE "$DEFAULT_AGENT_K8S_IMAGE_TAR_FILE" + get_variable CRONJOB_AUTO_UPGRADE_K8S_TAR_FILE "$DEFAULT_CRONJOB_AUTO_UPGRADE_K8S_TAR_FILE" else log_fatal 1 "Invalid AGENT_DEPLOY_TYPE value: $AGENT_DEPLOY_TYPE" fi @@ -1439,7 +1451,7 @@ function is_small_kube() { } function is_ocp_cluster() { - $KUBECTL get routes default-route -n openshift-image-registry >/dev/null 2>&1 + $KUBECTL get console -n openshift-console >/dev/null 2>&1 if [[ $? -ne 0 ]]; then return 1 # we couldn't get the default route in openshift-image-registry namespace, so the current cluster is not ocp else return 0; fi } @@ -3369,13 +3381,14 @@ function loadClusterAgentImage() { # use the same tag for the image in the edge cluster registry as the tag they used for the image in the inputted tar file IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY="$IMAGE_ON_EDGE_CLUSTER_REGISTRY:$AGENT_IMAGE_VERSION_IN_TAR" + log_debug "IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY is set to: $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY" + log_debug "loadClusterAgentImage() end" } -# Cluster only: to push agent and cronjob images to image registry that edge cluster can access -function pushImagesToEdgeClusterRegistry() { - log_debug "pushImagesToEdgeClusterRegistry() begin" - +# Cluster only: to set $EDGE_CLUSTER_REGISTRY_HOST, and login to registry +function getImageRegistryInfo() { + log_debug "getImageRegistryInfo() begin" # split $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY by "/" EDGE_CLUSTER_REGISTRY_HOST=$(echo $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY | awk -F'/' '{print $1}') log_info "Edge cluster registry host: $EDGE_CLUSTER_REGISTRY_HOST" @@ -3388,6 +3401,13 @@ function pushImagesToEdgeClusterRegistry() { chk $? "logging into edge cluster's registry: $EDGE_CLUSTER_REGISTRY_HOST" fi + log_debug "getImageRegistryInfo() end" +} + +# Cluster only: to push agent and cronjob images to image registry that edge cluster can access +function pushImagesToEdgeClusterRegistry() { + log_debug "pushImagesToEdgeClusterRegistry() begin" + log_info "Pushing docker image $AGENT_IMAGE to $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY ..." ${DOCKER_ENGINE} tag ${AGENT_IMAGE} ${IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY} runCmdQuietly ${DOCKER_ENGINE} push ${IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY} @@ -3449,15 +3469,34 @@ function loadClusterAgentAutoUpgradeCronJobImage() { log_debug "loadClusterAgentAutoUpgradeCronJobImage() end" } -function contains_namespace() { - local namespaces=$1 - for t in $namespaces - do - if [ "$t" == "$AGENT_NAMESPACE" ]; then - return 0 +# Cluster only: create image pull secrets if use remote private registry, this function is called when USE_EDGE_CLUSTER_REGISTRY=false, and remote registry is private +# Side-effect: set $USE_PRIVATE_REGISTRY +function create_image_pull_secrets() { + log_debug "create_image_pull_secrets() begin" + if [[ "$USE_EDGE_CLUSTER_REGISTRY" == "false" ]]; then + if [[ -n $EDGE_CLUSTER_REGISTRY_USERNAME && -n $EDGE_CLUSTER_REGISTRY_TOKEN && -n $EDGE_CLUSTER_REGISTRY_HOST ]]; then + log_verbose "checking if private registry is accessible..." + echo "$EDGE_CLUSTER_REGISTRY_TOKEN" | ${DOCKER_ENGINE} login -u $EDGE_CLUSTER_REGISTRY_USERNAME --password-stdin $EDGE_CLUSTER_REGISTRY_HOST + chk $? "logging into remote private registry: $EDGE_CLUSTER_REGISTRY_HOST" + + log_verbose "checking if secret ${IMAGE_PULL_SECRET_NAME} exist..." + USE_PRIVATE_REGISTRY="true" + + if $KUBECTL get secret ${IMAGE_PULL_SECRET_NAME} -n ${AGENT_NAMESPACE} 2>/dev/null; then + $KUBECTL delete secret ${IMAGE_PULL_SECRET_NAME} -n ${AGENT_NAMESPACE} >/dev/null 2>&1 + chk $? "deleting image pull secret before installing" + fi + + log_verbose "creating image pull secrets ${IMAGE_PULL_SECRET_NAME}..." + $KUBECTL create secret docker-registry ${IMAGE_PULL_SECRET_NAME} -n ${AGENT_NAMESPACE} --docker-server=${EDGE_CLUSTER_REGISTRY_HOST} --docker-username=${EDGE_CLUSTER_REGISTRY_USERNAME} --docker-password=${EDGE_CLUSTER_REGISTRY_TOKEN} --docker-email="" + chk $? "creating image pull secrets ${IMAGE_PULL_SECRET_NAME} from edge cluster registry info" + log_info "secret ${IMAGE_PULL_SECRET_NAME} created" + else + log_info "EDGE_CLUSTER_REGISTRY_USERNAME and/or EDGE_CLUSTER_REGISTRY_TOKEN is not specified, skip creating image pull secrets $IMAGE_PULL_SECRET_NAME" fi - done - return 1 + fi + + log_debug "create_image_pull_secrets() end" } # Cluster only: check if there is scope conflict @@ -3481,10 +3520,8 @@ function check_cluster_agent_scope() { # check namespace namespaces_have_agent=$($KUBECTL get deployment --field-selector metadata.name=agent -A -o jsonpath="{.items[*].metadata.namespace}" | tr -s '[[:space:]]' ',') log_info "Already have agent deployment in namespaces: $namespaces_have_agent, checking scope of existing agent" - - IFS=',' read -r -a namespace_array <<< "$namespaces_have_agent" - - if contains_namespace $namespace_array; then + + if [[ "$namespaces_have_agent" == *"$AGENT_NAMESPACE"* ]]; then log_debug "Namespaces array contains current namespace" # continue to check_agent_deployment_exist() to check scope AGENT_DEPLOYMENT_EXIST_IN_SAME_NAMESPACE="true" @@ -3498,6 +3535,7 @@ function check_cluster_agent_scope() { log_fatal 3 "One or more agents detected in $namespaces_have_agent. A cluster scoped agent cannot be installed to the same cluster that has agent(s) already" fi + IFS="," read -ra namespace_array <<< "$namespaces_have_agent" namespace_to_check=${namespace_array[0]} local namespace_scoped_env_value_in_use=$($KUBECTL get deployment agent -n ${namespace_to_check} -o jsonpath='{.spec.template.spec.containers[0].env}' | jq -r '.[] | select(.name=="HZN_NAMESPACE_SCOPED").value') log_debug "Current HZN_NAMESPACE_SCOPED in agent deployment under namespace $namespace_to_check is: $namespace_scoped_env_value_in_use" @@ -3551,6 +3589,22 @@ function check_agent_deployment_exist() { # check 1) agent image in deployment # eg: {image-registry}:5000/{repo}/{image-name}:{version} local agent_image_in_use=$($KUBECTL get deployment agent -o jsonpath='{$.spec.template.spec.containers[:1].image}' -n ${AGENT_NAMESPACE}) + + # {image-registry}:5000/{repo} + local agent_image_on_edge_cluster_registry=${agent_image_in_use%:*} + if [[ "$agent_image_on_edge_cluster_registry" != "$IMAGE_ON_EDGE_CLUSTER_REGISTRY" ]]; then + log_fatal 3 "Current deployment image registry cannot be updated, please run agent-uninstall.sh and re-run agent-install.sh" + fi + + local image_pull_secrets_length=$($KUBECTL get deployment agent -n agent-ns4 -o jsonpath='{.spec.template.spec.imagePullSecrets}' | jq length) + local use_image_pull_secrets + if [[ "$image_pull_secrets_length" == "1" ]]; then + use_image_pull_secrets="true" + fi + if [[ "$use_image_pull_secrets" != "$USE_PRIVATE_REGISTRY" ]]; then + log_fatal 3 "Current deployment image registry pull secrets info cannot be updated, please run agent-uninstall.sh and re-run agent-install.sh" + fi + # {image-name}:{version} local agent_image_name_with_tag=$(echo $agent_image_in_use | awk -F'/' '{print $3}') # {version} @@ -3565,6 +3619,13 @@ function check_agent_deployment_exist() { # check 2) auto-upgrade-cronjob cronjob image in cronjob yml # eg: {image-registry}:5000/{repo}/{image-name}:{version} local auto_upgrade_cronjob_image_in_use=$($KUBECTL get cronjob ${CRONJOB_AUTO_UPGRADE_NAME} -o jsonpath='{$.spec.jobTemplate.spec.template.spec.containers[:1].image}' -n ${AGENT_NAMESPACE}) + + # {image-registry}:5000/{repo} + local auto_upgrade_cronjob_image_on_edge_cluster_registry=${auto_upgrade_cronjob_image_in_use%:*} + if [[ "$auto_upgrade_cronjob_image_on_edge_cluster_registry" != "$CRONJOB_AUTO_UPGRADE_IMAGE_ON_EDGE_CLUSTER_REGISTRY" ]]; then + log_fatal 3 "Current auto-upgrade-cronjob cronjob image registry cannot be updated, please run agent-uninstall.sh and re-run agent-install.sh" + fi + # {image-name}:{version} local auto_upgrade_cronjob_image_name_with_tag=$(echo $auto_upgrade_cronjob_image_in_use | awk -F'/' '{print $3}') # {version} @@ -3695,13 +3756,13 @@ function prepare_k8s_deployment_file() { sed -i -e '{/START_CERT_VOL/,/END_CERT_VOL/d;}' deployment-template.yml fi - sed -e "s#__AgentNameSpace__#\"${AGENT_NAMESPACE}\"#g" -e "s#__NamespaceScoped__#\"${NAMESPACE_SCOPED}\"#g" -e "s#__OrgId__#\"${HZN_ORG_ID}\"#g" deployment-template.yml >deployment.yml + sed -e "s#__AgentNameSpace__#\"${AGENT_NAMESPACE}\"#g" -e "s#__InitContainerImagePath__#${INIT_CONTAINER_IMAGE}#g" -e "s#__NamespaceScoped__#\"${NAMESPACE_SCOPED}\"#g" -e "s#__OrgId__#\"${HZN_ORG_ID}\"#g" deployment-template.yml >deployment.yml chk $? 'creating deployment.yml' if [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]]; then + sed -i -e '{/START_REMOTE_ICR/,/END_REMOTE_ICR/d;}' deployment.yml EDGE_CLUSTER_REGISTRY_PROJECT_NAME=$(echo $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY | awk -F'/' '{print $2}') EDGE_CLUSTER_AGENT_IMAGE_AND_TAG=$(echo $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY | awk -F'/' '{print $3}') - local image_full_path_on_edge_cluster_registry_internal_url if [[ "$INTERNAL_URL_FOR_EDGE_CLUSTER_REGISTRY" == "" ]]; then if is_ocp_cluster; then @@ -3726,7 +3787,20 @@ function prepare_k8s_deployment_file() { sed -i -e "s#__ImageRegistryHost__#${EDGE_CLUSTER_REGISTRY_HOST}#g" deployment.yml fi else - log_fatal 1 "Agent install on edge cluster requires using an edge cluster registry" + log_info "This agent install on edge cluster is using a remote registry: $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY" + + log_info "Checking if image exists in remote registry..." + set +e + docker manifest inspect $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY >/dev/null 2>&1 + chk $? "checking existence of image $IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY" + set -e + + sed -i -e "s#__ImagePath__#${IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY}#g" deployment.yml + + if [[ "$USE_PRIVATE_REGISTRY" != "true" ]]; then + log_debug "remote image registry is not private, remove ImagePullSecret..." + sed -i -e '{/START_REMOTE_ICR/,/END_REMOTE_ICR/d;}' deployment.yml + fi fi log_debug "prepare_k8s_deployment_file() end" @@ -3750,6 +3824,7 @@ function prepare_k8s_auto_upgrade_cronjob_file() { chk $? 'creating auto-upgrade-cronjob.yml' if [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]]; then + sed -i -e '{/START_REMOTE_ICR/,/END_REMOTE_ICR/d;}' auto-upgrade-cronjob.yml EDGE_CLUSTER_REGISTRY_PROJECT_NAME=$(echo $CRONJOB_AUTO_UPGRADE_IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY | awk -F'/' '{print $2}') EDGE_CLUSTER_CRONJOB_AUTO_UPGRADE_IMAGE_AND_TAG=$(echo $CRONJOB_AUTO_UPGRADE_IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY | awk -F'/' '{print $3}') @@ -3766,7 +3841,19 @@ function prepare_k8s_auto_upgrade_cronjob_file() { fi sed -i -e "s#__ImagePath__#${auto_upgrade_cronjob_image_full_path_on_edge_cluster_registry_internal_url}#g" auto-upgrade-cronjob.yml else - log_fatal 1 "Agent install on edge cluster requires using an edge cluster registry" + log_info "This agent install on edge cluster is using a remote registry: $CRONJOB_AUTO_UPGRADE_IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY" + log_info "Checking if image exists in remote registry..." + set +e + docker manifest inspect $CRONJOB_AUTO_UPGRADE_IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY >/dev/null 2>&1 + chk $? "checking existence of image $CRONJOB_AUTO_UPGRADE_IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY" + set -e + + sed -i -e "s#__ImagePath__#${CRONJOB_AUTO_UPGRADE_IMAGE_FULL_PATH_ON_EDGE_CLUSTER_REGISTRY}#g" auto-upgrade-cronjob.yml + + if [[ "$USE_PRIVATE_REGISTRY" != "true" ]]; then + log_debug "remote image registry is not private, remove ImagePullSecret..." + sed -i -e '{/START_REMOTE_ICR/,/END_REMOTE_ICR/d;}' auto-upgrade-cronjob.yml + fi fi log_debug "prepare_k8s_auto_upgrade_cronjob_file() end" @@ -4216,7 +4303,7 @@ function patch_deployment_with_image_registry_volume() { log_debug "patch_deployment_with_image_registry_volume() begin" $KUBECTL patch deployment agent -n ${AGENT_NAMESPACE} -p "{\"spec\":{\"template\":{\"spec\":{\"volumes\":[{\"name\": \ - \"agent-docker-cert-volume\",\"secret\":{\"secretName\":\"openhorizon-agent-secrets-docker-cert\"}}], \ + \"agent-docker-cert-volume\",\"secret\":{\"secretName\":\"${IMAGE_REGISTRY_SECRET_NAME}\"}}], \ \"containers\":[{\"name\":\"anax\",\"volumeMounts\":[{\"mountPath\":\"/etc/docker/certs.d/${EDGE_CLUSTER_REGISTRY_HOST}\" \ ,\"name\":\"agent-docker-cert-volume\"},{\"mountPath\":\"/etc/docker/certs.d/${DEFAULT_OCP_INTERNAL_URL_FOR_EDGE_CLUSTER_REGISTRY}\" \ ,\"name\":\"agent-docker-cert-volume\"}],\"env\":[{\"name\":\"SSL_CERT_FILE\",\"value\":\"/etc/docker/certs.d/${EDGE_CLUSTER_REGISTRY_HOST}/ca.crt\"}]}]}}}}" @@ -4236,6 +4323,8 @@ function install_update_cluster() { loadClusterAgentImage # create the cluster agent docker image locally loadClusterAgentAutoUpgradeCronJobImage # create the cluster cronjob docker images locally + getImageRegistryInfo # set $EDGE_CLUSTER_REGISTRY_HOST, and login to registry + # push agent and cronjob images to cluster's registry if [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]]; then if is_ocp_cluster; then @@ -4243,6 +4332,10 @@ function install_update_cluster() { create_image_stream fi pushImagesToEdgeClusterRegistry + else + log_info "Use remote registry" + create_namespace + create_image_pull_secrets # create image pull secrets if use private registry (if edge cluster registry username/password are provided), sets USE_PRIVATE_REGISTRY fi if [[ "$AGENT_DEPLOYMENT_EXIST_IN_SAME_NAMESPACE" == "true" ]]; then @@ -4281,7 +4374,7 @@ function install_cluster() { create_deployment check_deployment_status - if is_ocp_cluster; then + if is_ocp_cluster && [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]]; then # setup image registry cert. This will patch the running deployment local isUpdate='false' setup_cluster_image_registry_cert $isUpdate @@ -4321,7 +4414,7 @@ function update_cluster() { update_deployment check_deployment_status - if is_ocp_cluster; then + if is_ocp_cluster && [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]]; then # setup image registry cert. This will patch the running deployment local isUpdate='true' setup_cluster_image_registry_cert $isUpdate diff --git a/agent-install/agent-uninstall.sh b/agent-install/agent-uninstall.sh index 0db231102..0b14ec975 100644 --- a/agent-install/agent-uninstall.sh +++ b/agent-install/agent-uninstall.sh @@ -8,6 +8,7 @@ DEPLOYMENT_NAME="agent" SERVICE_ACCOUNT_NAME="agent-service-account" CLUSTER_ROLE_BINDING_NAME="openhorizon-agent-cluster-rule" SECRET_NAME="openhorizon-agent-secrets" +IMAGE_PULL_SECRET_NAME="registry-creds" IMAGE_REGISTRY_SECRET_NAME="openhorizon-agent-secrets-docker-cert" CONFIGMAP_NAME="openhorizon-agent-config" PVC_NAME="openhorizon-agent-pvc" @@ -362,6 +363,7 @@ function deleteAgentResources() { log_info "Deleting secret..." $KUBECTL delete secret $SECRET_NAME -n $AGENT_NAMESPACE $KUBECTL delete secret $IMAGE_REGISTRY_SECRET_NAME -n $AGENT_NAMESPACE + $KUBECTL delete secret $IMAGE_PULL_SECRET_NAME -n $AGENT_NAMESPACE $KUBECTL delete secret ${SECRET_NAME}-backup -n $AGENT_NAMESPACE set -e diff --git a/agent-install/k8s/auto-upgrade-cronjob-template.yml b/agent-install/k8s/auto-upgrade-cronjob-template.yml index aa8848676..7416d0e2e 100644 --- a/agent-install/k8s/auto-upgrade-cronjob-template.yml +++ b/agent-install/k8s/auto-upgrade-cronjob-template.yml @@ -2,6 +2,9 @@ apiVersion: __KubernetesApi__ kind: CronJob metadata: name: auto-upgrade-cronjob + labels: + app: agent + openhorizon.org/component: agent spec: schedule: '*/1 * * * *' concurrencyPolicy: Forbid @@ -11,11 +14,19 @@ spec: spec: backoffLimit: 0 template: + metadata: + labels: + app: agent + openhorizon.org/component: agent spec: volumes: - name: agent-pvc-storage persistentVolumeClaim: claimName: openhorizon-agent-pvc + # START_REMOTE_ICR + imagePullSecrets: + - name: registry-creds + # END_REMOTE_ICR containers: - name: agent-auto-upgrade securityContext: @@ -33,7 +44,7 @@ spec: - '-c' - >- /usr/local/bin/auto-upgrade-cronjob.sh - imagePullPolicy: Always + imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /var/horizon name: agent-pvc-storage diff --git a/agent-install/k8s/deployment-template.yml b/agent-install/k8s/deployment-template.yml index c8dba211f..667de63a9 100644 --- a/agent-install/k8s/deployment-template.yml +++ b/agent-install/k8s/deployment-template.yml @@ -3,15 +3,20 @@ kind: Deployment metadata: name: agent namespace: __AgentNameSpace__ + labels: + app: agent + openhorizon.org/component: agent spec: replicas: 1 selector: matchLabels: app: agent + openhorizon.org/component: agent template: metadata: labels: app: agent + openhorizon.org/component: agent spec: serviceAccountName: agent-service-account volumes: @@ -29,7 +34,8 @@ spec: # START_NOT_FOR_OCP initContainers: - name: initcontainer - image: alpine:latest + image: __InitContainerImagePath__ + imagePullPolicy: IfNotPresent securityContext: runAsNonRoot: false command: @@ -44,10 +50,14 @@ spec: name: agent-pvc-storage subPath: horizon # END_NOT_FOR_OCP + # START_REMOTE_ICR + imagePullSecrets: + - name: registry-creds + # END_REMOTE_ICR containers: - name: anax image: __ImagePath__ - imagePullPolicy: Always + imagePullPolicy: IfNotPresent volumeMounts: - mountPath: /etc/default/horizon subPath: horizon diff --git a/clusterupgrade/cluster_upgrade_worker.go b/clusterupgrade/cluster_upgrade_worker.go index 5e82fb541..d82a623f3 100644 --- a/clusterupgrade/cluster_upgrade_worker.go +++ b/clusterupgrade/cluster_upgrade_worker.go @@ -50,7 +50,8 @@ const ( ) const ( - DEFAULT_CERT_PATH = "/etc/default/cert/" + DEFAULT_CERT_PATH = "/etc/default/cert/" + DEFAULT_IMAGE_REGISTRY_IN_DEPLOYMENT = "__ImageRegistryHost__" ) const ( @@ -702,8 +703,9 @@ func checkAgentImage(kubeClient *KubeClient, workDir string) (bool, string, stri } glog.Infof(cuwlog(fmt.Sprintf("Get image %v from tar file, extracted image tag: %v", fullImageTag, imageTag))) - if currentAgentVersion != imageTag { - // push image to image registry + if currentAgentVersion != imageTag && !agentUseRemoteRegistry() { + // push image to image registry if use edge cluster local registry + // If AGENT_CLUSTER_IMAGE_REGISTRY_HOST env is not set, it means agent is using remote image registry, and no need to push image imageRegistry := os.Getenv("AGENT_CLUSTER_IMAGE_REGISTRY_HOST") if imageRegistry == "" { return false, "", "", fmt.Errorf("failed to get edge cluster image registry host from environment veriable: %v", imageRegistry) @@ -751,7 +753,6 @@ func checkAgentImage(kubeClient *KubeClient, workDir string) (bool, string, stri } glog.Infof(cuwlog(fmt.Sprintf("Successfully pushed image %v", newImageRepoWithTag))) } - return (currentAgentVersion == imageTag), imageTag, currentAgentVersion, nil } @@ -767,3 +768,12 @@ func checkAgentImageAgainstStatusFile(workDir string) (bool, error) { return true, nil } } + +func agentUseRemoteRegistry() bool { + useRemoteRegistry := false + imageRegistry := os.Getenv("AGENT_CLUSTER_IMAGE_REGISTRY_HOST") + if imageRegistry == DEFAULT_IMAGE_REGISTRY_IN_DEPLOYMENT { + useRemoteRegistry = true + } + return useRemoteRegistry +} diff --git a/kube_operator/api_objects.go b/kube_operator/api_objects.go index 852c1bdef..3e3c99756 100644 --- a/kube_operator/api_objects.go +++ b/kube_operator/api_objects.go @@ -91,36 +91,28 @@ func sortAPIObjects(allObjects []APIObjects, customResources map[string][]*unstr return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: rolebinding object has unrecognized type %T: %v", obj.Object, obj.Object))) } case K8S_CLUSTER_ROLE_TYPE: - if !cutil.IsNamespaceScoped() { - if typedRole, ok := obj.Object.(*rbacv1.ClusterRole); ok { - newRole := ClusterRoleRbacV1{ClusterRoleObject: typedRole} - if newRole.Name() != "" { - glog.V(4).Infof(kwlog(fmt.Sprintf("Found kubernetes cluster role object %s.", newRole.Name()))) - objMap[K8S_CLUSTER_ROLE_TYPE] = append(objMap[K8S_CLUSTER_ROLE_TYPE], newRole) - } else { - return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: cluster role object must have a name in its metadata section."))) - } + if typedRole, ok := obj.Object.(*rbacv1.ClusterRole); ok { + newRole := ClusterRoleRbacV1{ClusterRoleObject: typedRole} + if newRole.Name() != "" { + glog.V(4).Infof(kwlog(fmt.Sprintf("Found kubernetes cluster role object %s.", newRole.Name()))) + objMap[K8S_CLUSTER_ROLE_TYPE] = append(objMap[K8S_CLUSTER_ROLE_TYPE], newRole) } else { - return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: cluster role object has unrecognized type %T: %v", obj.Object, obj.Object))) + return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: cluster role object must have a name in its metadata section."))) } } else { - glog.Warningf(kwlog(fmt.Sprintf("Ignore cluster role object because this agent is Namespace-scoped."))) + return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: cluster role object has unrecognized type %T: %v", obj.Object, obj.Object))) } case K8S_CLUSTER_ROLEBINDING_TYPE: - if !cutil.IsNamespaceScoped() { - if typedRoleBinding, ok := obj.Object.(*rbacv1.ClusterRoleBinding); ok { - newRolebinding := ClusterRolebindingRbacV1{ClusterRolebindingObject: typedRoleBinding} - if newRolebinding.Name() != "" { - glog.V(4).Infof(kwlog(fmt.Sprintf("Found kubernetes cluser rolebinding object %s.", newRolebinding.Name()))) - objMap[K8S_CLUSTER_ROLEBINDING_TYPE] = append(objMap[K8S_CLUSTER_ROLEBINDING_TYPE], newRolebinding) - } else { - return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: rolebinding object must have a name in its metadata section."))) - } + if typedRoleBinding, ok := obj.Object.(*rbacv1.ClusterRoleBinding); ok { + newRolebinding := ClusterRolebindingRbacV1{ClusterRolebindingObject: typedRoleBinding} + if newRolebinding.Name() != "" { + glog.V(4).Infof(kwlog(fmt.Sprintf("Found kubernetes cluser rolebinding object %s.", newRolebinding.Name()))) + objMap[K8S_CLUSTER_ROLEBINDING_TYPE] = append(objMap[K8S_CLUSTER_ROLEBINDING_TYPE], newRolebinding) } else { - return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: rolebinding object has unrecognized type %T: %v", obj.Object, obj.Object))) + return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: rolebinding object must have a name in its metadata section."))) } } else { - glog.Warningf(kwlog(fmt.Sprintf("Ignore cluster rolebinding object because this agent is Namespace-scoped."))) + return objMap, namespace, fmt.Errorf(kwlog(fmt.Sprintf("Error: rolebinding object has unrecognized type %T: %v", obj.Object, obj.Object))) } case K8S_DEPLOYMENT_TYPE: if typedDeployment, ok := obj.Object.(*appsv1.Deployment); ok { @@ -325,27 +317,23 @@ type ClusterRoleRbacV1 struct { } func (cr ClusterRoleRbacV1) Install(c KubeClient, namespace string) error { - if cutil.IsNamespaceScoped() { - glog.Warningf(kwlog(fmt.Sprintf("Skip install cluster role because this agent is Namespace-scoped."))) - return nil - } + glog.V(3).Infof(kwlog(fmt.Sprintf("checking if cluster role %s exists...", cr.Name()))) + //if existingCr, _ := c.Client.RbacV1().ClusterRoles().Get(context.Background(), cr.Name(), metav1.GetOptions{}); existingCr != nil { + //glog.Warningf(kwlog(fmt.Sprintf("Skip install cluster role %v because it is already exists.", cr.Name()))) + //} else { glog.V(3).Infof(kwlog(fmt.Sprintf("creating cluster role %v", cr))) _, err := c.Client.RbacV1().ClusterRoles().Create(context.Background(), cr.ClusterRoleObject, metav1.CreateOptions{}) if err != nil && errors.IsAlreadyExists(err) { glog.Warningf(kwlog(fmt.Sprintf("Skip install cluster role because it is already exists."))) - } - if err != nil { + } else if err != nil { return fmt.Errorf(kwlog(fmt.Sprintf("Error creating the cluster role: %v", err))) } + //} return nil } func (cr ClusterRoleRbacV1) Uninstall(c KubeClient, namespace string) { - if cutil.IsNamespaceScoped() { - glog.Warningf(kwlog(fmt.Sprintf("Skip uninstall cluster role because this agent is Namespace-scoped."))) - return - } // delete only if there is no one else using this role // 1. list all clusterrolebinding that associated with this clusterrole, clusterrolebindings in this operator should already being deleted at this point. glog.V(3).Infof(kwlog(fmt.Sprintf("deleting cluster role %s", cr.Name()))) @@ -370,14 +358,12 @@ func (cr ClusterRoleRbacV1) Uninstall(c KubeClient, namespace string) { if stillInUse { glog.V(3).Infof(kwlog(fmt.Sprintf("Skip deleting cluster role %s, it is still in use", cr.Name()))) - } else { err := c.Client.RbacV1().ClusterRoles().Delete(context.Background(), cr.Name(), metav1.DeleteOptions{}) if err != nil { glog.Errorf(kwlog(fmt.Sprintf("unable to delete role %s. Error: %v", cr.Name(), err))) } } - } func (cr ClusterRoleRbacV1) Status(c KubeClient, namespace string) (interface{}, error) { @@ -402,42 +388,115 @@ type ClusterRolebindingRbacV1 struct { } func (crb ClusterRolebindingRbacV1) Install(c KubeClient, namespace string) error { - if cutil.IsNamespaceScoped() { - glog.Warningf(kwlog(fmt.Sprintf("Skip install cluster role binding because this agent is Namespace-scoped."))) - return nil - } glog.V(3).Infof(kwlog(fmt.Sprintf("creating cluster role binding %v", crb))) - // Do we need this for cluster role binding?? - subs := []rbacv1.Subject{} - for _, sub := range crb.ClusterRolebindingObject.Subjects { - rb_sub := &sub - if sub.Namespace != "" && sub.Namespace != namespace { - rb_sub.Namespace = namespace - } - subs = append(subs, *rb_sub) - } - crb.ClusterRolebindingObject.Subjects = subs + /* + apiVersion: rbac.authorization.k8s.io/v1 + kind: ClusterRoleBinding + metadata: + labels: + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: nginx-ansible-operator + app.kubernetes.io/instance: manager-rolebinding + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/name: clusterrolebinding + app.kubernetes.io/part-of: nginx-ansible-operator + name: nginx-ansible-operator-manager-rolebinding + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: nginx-ansible-operator-manager-role + subjects: + - kind: ServiceAccount + name: nginx-ansible-operator-controller-manager + namespace: openhorizon-agent + */ + // checking the serviceaccount for clusterrolebinding if it is namespace-scoped agent: + // - If the namespace of serviceaccount is defined in yaml, but is different from namespace for operator, replace the sa namespace with namespace to deploy operator. - _, err := c.Client.RbacV1().ClusterRoleBindings().Create(context.Background(), crb.ClusterRolebindingObject, metav1.CreateOptions{}) - if err != nil && errors.IsAlreadyExists(err) { - glog.Warningf(kwlog(fmt.Sprintf("Skip install cluster role binding because it is already exists."))) - } - if err != nil { + if cutil.IsNamespaceScoped() { + // normalize the namespace of service account for namespace scoped agent + subs := []rbacv1.Subject{} + for _, sub := range crb.ClusterRolebindingObject.Subjects { + rb_sub := &sub + if sub.Namespace != "" && sub.Namespace != namespace { + rb_sub.Namespace = namespace + } + subs = append(subs, *rb_sub) + } + crb.ClusterRolebindingObject.Subjects = subs + } + + // get clusterrolebinding + existingCRB, err := c.Client.RbacV1().ClusterRoleBindings().Get(context.Background(), crb.Name(), metav1.GetOptions{}) + if err != nil && errors.IsAlreadyExists(err) && existingCRB != nil { + glog.Warningf(kwlog(fmt.Sprintf("clusterrolebinding %v exists, updating it...", crb.Name()))) + glog.Warningf(kwlog(fmt.Sprintf("Lily debug: existingCRB %v, err: %v", existingCRB, err))) + + // add subjects to the existing CRB + // subs := existingCRB.Subjects + // subs = append(subs, crb.ClusterRolebindingObject.Subjects...) + + // // remove duplicate in the subjects array + // submap := make(map[string]rbacv1.Subject) + // for _, sub := range subs { + // key := fmt.Sprintf("%v/%v/%v", sub.Namespace, sub.Kind, sub.Kind) // key is // + // submap[key] = sub + // } + + // subjectsNoDup := []rbacv1.Subject{} + // for _, sub := range submap { + // subjectsNoDup = append(subjectsNoDup, sub) + // } + + subjectsNoDup := combineRoleBindingSubjects(existingCRB.Subjects, crb.ClusterRolebindingObject.Subjects) + existingCRB.Subjects = subjectsNoDup + glog.V(3).Infof(kwlog(fmt.Sprintf("Lily - existingCRB: %v", existingCRB))) + updatedCRB, err := c.Client.RbacV1().ClusterRoleBindings().Update(context.Background(), existingCRB, metav1.UpdateOptions{}) + glog.V(3).Infof(kwlog(fmt.Sprintf("Lily - updatedCRB: %v", updatedCRB))) + if err != nil { + return fmt.Errorf(kwlog(fmt.Sprintf("Error updating the existing cluster rolebinding: %v", err))) + } + } else if _, err := c.Client.RbacV1().ClusterRoleBindings().Create(context.Background(), crb.ClusterRolebindingObject, metav1.CreateOptions{}); err != nil { return fmt.Errorf(kwlog(fmt.Sprintf("Error creating the cluster rolebinding: %v", err))) } + return nil } func (crb ClusterRolebindingRbacV1) Uninstall(c KubeClient, namespace string) { - if cutil.IsNamespaceScoped() { - glog.Warningf(kwlog(fmt.Sprintf("Skip uninstall cluster role binding because this agent is Namespace-scoped."))) - return - } glog.V(3).Infof(kwlog(fmt.Sprintf("deleting cluster role binding %s", crb.ClusterRolebindingObject.ObjectMeta.Name))) - err := c.Client.RbacV1().ClusterRoleBindings().Delete(context.Background(), crb.ClusterRolebindingObject.ObjectMeta.Name, metav1.DeleteOptions{}) - if err != nil { - glog.Errorf(kwlog(fmt.Sprintf("unable to delete role binding %s. Error: %v", crb.ClusterRolebindingObject.ObjectMeta.Name, err))) + if cutil.IsNamespaceScoped() { + // normalize the namespace of service account for namespace scoped agent + subs := []rbacv1.Subject{} + for _, sub := range crb.ClusterRolebindingObject.Subjects { + rb_sub := &sub + if sub.Namespace != "" && sub.Namespace != namespace { + rb_sub.Namespace = namespace + } + subs = append(subs, *rb_sub) + } + crb.ClusterRolebindingObject.Subjects = subs + } + existingCRB, err := c.Client.RbacV1().ClusterRoleBindings().Get(context.Background(), crb.Name(), metav1.GetOptions{}) + if err != nil && errors.IsAlreadyExists(err) && existingCRB != nil { + glog.Warningf(kwlog(fmt.Sprintf("Lily debug: existingCRB %v, err: %v", existingCRB, err))) + remainSubjects := removeRoleBindingSubjects(existingCRB.Subjects, crb.ClusterRolebindingObject.Subjects) + if len(remainSubjects) == 0 { + glog.V(3).Infof(kwlog(fmt.Sprintf("Lily - no remainSubjects, delete clusterrolebinding %v", crb.ClusterRolebindingObject.ObjectMeta.Name))) + err := c.Client.RbacV1().ClusterRoleBindings().Delete(context.Background(), crb.ClusterRolebindingObject.ObjectMeta.Name, metav1.DeleteOptions{}) + if err != nil { + glog.Errorf(kwlog(fmt.Sprintf("unable to delete role binding %s. Error: %v", crb.ClusterRolebindingObject.ObjectMeta.Name, err))) + } + } else { + glog.V(3).Infof(kwlog(fmt.Sprintf("Lily - assign remain subjects %v to existingCRB: %v", remainSubjects, existingCRB))) + existingCRB.Subjects = remainSubjects + updatedCRB, err := c.Client.RbacV1().ClusterRoleBindings().Update(context.Background(), existingCRB, metav1.UpdateOptions{}) + glog.V(3).Infof(kwlog(fmt.Sprintf("Lily - updatedCRB: %v", updatedCRB))) + if err != nil { + glog.Errorf(kwlog(fmt.Sprintf("Error deleting subjects in existing clusterrolebinding: %v", err))) + } + } } } @@ -1307,3 +1366,34 @@ func decodeServiceSecret(serviceSecrets map[string]string) (map[string]string, e } return decodedSec, nil } + +func combineRoleBindingSubjects(subjects1 []rbacv1.Subject, subjects2 []rbacv1.Subject) []rbacv1.Subject { + subs := subjects1 + subs = append(subs, subjects2...) + + // remove duplicate in the subjects array + submap := make(map[string]rbacv1.Subject) + for _, sub := range subs { + key := fmt.Sprintf("%v/%v/%v", sub.Namespace, sub.Kind, sub.Kind) // key is // + submap[key] = sub + } + + subjectsNoDup := []rbacv1.Subject{} + for _, sub := range submap { + subjectsNoDup = append(subjectsNoDup, sub) + } + + return subjectsNoDup +} + +func removeRoleBindingSubjects(allSubjects []rbacv1.Subject, subjectsToRemove []rbacv1.Subject) []rbacv1.Subject { + for _, subToRemove := range subjectsToRemove { + for i, sub := range allSubjects { + if subToRemove == sub { + allSubjects = append(allSubjects[:i], allSubjects[i+1:]...) + break + } + } + } + return allSubjects +}