Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zhangl/cherrypick dec #4204

Merged
merged 10 commits into from
Dec 13, 2024
Merged
6 changes: 3 additions & 3 deletions .github/workflows/build-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ jobs:
mac-build:
needs: offset-build-number

runs-on: ${{ (matrix.architecture == 'amd64') && 'macos-12' || 'macos-14' }}
runs-on: ${{ (matrix.architecture == 'amd64') && 'macos-13' || 'macos-14' }}

strategy:
matrix:
Expand Down Expand Up @@ -450,7 +450,7 @@ jobs:

# Upload created package to artifacts to be used in next job
- name: Upload MacOS Package to Artifacts
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: macos-${{ matrix.architecture }}-package
path: ${{ env.GOPATH }}/src/github.com/${{ github.repository }}/pkg/mac/build/*.pkg
Expand Down Expand Up @@ -517,7 +517,7 @@ jobs:

# Retrieve artifact from previous job
- name: Download our MacOS Package Artifact
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: macos-${{ matrix.architecture }}-package
path: ${{ steps.workdir_setup.outputs.PKG_PATH }}
Expand Down
57 changes: 44 additions & 13 deletions agent-install/agent-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2056,7 +2056,7 @@ function install_macos() {
fi

if [[ $AGENT_AUTO_UPGRADE != 'true' ]]; then
check_existing_exch_node_is_correct_type "device"
check_existing_exch_node_info "device"
fi

if is_agent_registered && (! is_horizon_defaults_correct || ! is_registration_correct); then
Expand Down Expand Up @@ -2304,7 +2304,7 @@ function install_debian() {
check_and_set_anax_port # sets ANAX_PORT

if [[ $AGENT_AUTO_UPGRADE != 'true' ]]; then
check_existing_exch_node_is_correct_type "device"
check_existing_exch_node_info "device"
fi

if is_agent_registered && (! is_horizon_defaults_correct "$ANAX_PORT" || ! is_registration_correct); then
Expand Down Expand Up @@ -2566,7 +2566,7 @@ function install_redhat() {
if [[ $AGENT_ONLY_CLI != 'true' ]]; then
check_and_set_anax_port # sets ANAX_PORT
if [[ $AGENT_AUTO_UPGRADE != 'true' ]]; then
check_existing_exch_node_is_correct_type "device"
check_existing_exch_node_info "device"
fi

if is_agent_registered && (! is_horizon_defaults_correct "$ANAX_PORT" || ! is_registration_correct); then
Expand Down Expand Up @@ -3392,13 +3392,8 @@ function find_node_ip_address() {
fi
}

# If node exist in management hub, verify it is correct type (device or cluster)
function check_existing_exch_node_is_correct_type() {
log_debug "check_existing_exch_node_is_correct_type() begin"

local expected_type=$1

log_info "Verifying that node $NODE_ID in the exchange is type $expected_type (if it exists)..."
# check the node with $NODE_ID in the exchange, return the output from the exchange
function get_existing_exch_node() {
local exch_creds cert_flag
if [[ -n $HZN_EXCHANGE_USER_AUTH ]]; then exch_creds="$HZN_ORG_ID/$HZN_EXCHANGE_USER_AUTH"
else exch_creds="$HZN_ORG_ID/$HZN_EXCHANGE_NODE_AUTH" # input checking requires either user creds or node creds
Expand All @@ -3407,7 +3402,32 @@ function check_existing_exch_node_is_correct_type() {
if [[ -n $AGENT_CERT_FILE && -f $AGENT_CERT_FILE ]]; then
cert_flag="--cacert $AGENT_CERT_FILE"
fi
local exch_output=$(curl -fsS ${CURL_RETRY_PARMS} $cert_flag $HZN_EXCHANGE_URL/orgs/$HZN_ORG_ID/nodes/$NODE_ID -u "$exch_creds" 2>/dev/null) || true
exch_output=$(curl -fsS ${CURL_RETRY_PARMS} $cert_flag $HZN_EXCHANGE_URL/orgs/$HZN_ORG_ID/nodes/$NODE_ID -u "$exch_creds" 2>/dev/null) || true
echo "$exch_output"
}

# check if the node with $NODE_ID exists in the exchange, and if public key of node is set
function check_node_existing_and_active() {
log_debug "check_node_existing_and_active() begin"
local exch_output=$(get_existing_exch_node)
if [[ -n "$exch_output" ]]; then
local exch_node_public_key=$(echo $exch_output | jq -re '.nodes | .[].publicKey')
if [[ "$exch_node_public_key" != "" ]] ; then
log_fatal 2 "node $NODE_ID already exists in the exchange and encryption key is set. To continue, use a different node id or delete existing node from the exchange"
fi
fi
log_debug "check_node_existing_and_active() end"
}

# Check if the node exist in management hub, verify 1) it is correct type (device or cluster), 2) for cluster node, verify namespace
function check_existing_exch_node_info() {
log_debug "check_existing_exch_node_info() begin"

local expected_type=$1
local expected_namespace=$2

log_info "Verifying that node $NODE_ID in the exchange is type $expected_type (if it exists)..."
local exch_output=$(get_existing_exch_node)

if [[ -n "$exch_output" ]]; then
local exch_node_type=$(echo $exch_output | jq -re '.nodes | .[].nodeType')
Expand All @@ -3416,9 +3436,17 @@ function check_existing_exch_node_is_correct_type() {
elif [[ "$exch_node_type" == "cluster" ]] && [[ "$expected_type" != "cluster" ]]; then
log_fatal 2 "Node id ${NODE_ID} has already been created as nodeType cluster. Remove the node from the exchange and run this script again."
fi

local exch_node_namespace=$(echo $exch_output | jq -re '.nodes | .[].clusterNamespace')
local exch_node_public_key=$(echo $exch_output | jq -re '.nodes | .[].publicKey')
if [[ "$exch_node_type" == "cluster" ]] && [[ "$exch_node_public_key" != "" ]] && [[ "$expected_namespace" != "$exch_node_namespace" ]]; then
log_fatal 2 "Cluster node: $NODE_ID already exists in namespace $exch_node_namespace. To continue, use a different node id or delete existing node from the exchange"
elif [[ "$exch_node_type" == "cluster" ]] && [[ "$exch_node_public_key" == "" ]]; then
log_info "The node in the exchange ($exch_node_namespace) has empty encryption key, continue on cluster install/update"
fi
fi

log_debug "check_existing_exch_node_is_correct_type() end"
log_debug "check_existing_exch_node_info() end"
}

# make sure the new exchange url and cert are good.
Expand Down Expand Up @@ -4505,7 +4533,7 @@ function install_update_cluster() {

confirmCmds jq

check_existing_exch_node_is_correct_type "cluster"
check_existing_exch_node_info "cluster" $AGENT_NAMESPACE

check_cluster_agent_scope # sets AGENT_DEPLOYMENT_EXIST_IN_SAME_NAMESPACE

Expand Down Expand Up @@ -4551,6 +4579,8 @@ function install_update_cluster() {
function install_cluster() {
log_debug "install_cluster() begin"

check_node_existing_and_active

# generate files based on templates
generate_installation_files

Expand Down Expand Up @@ -4713,3 +4743,4 @@ elif is_cluster; then
else
log_fatal 1 "AGENT_DEPLOY_TYPE must be 'device' or 'cluster'"
fi

58 changes: 27 additions & 31 deletions agent-install/agent-uninstall.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ SKIP_DELETE_AGENT_NAMESPACE=false
USE_DELETE_FORCE=false
DELETE_TIMEOUT=10 # Default delete timeout

function now() {
echo `date '+%Y-%m-%d %H:%M:%S'`
}

# Exit handling
function quit(){
case $1 in
Expand Down Expand Up @@ -215,7 +211,7 @@ function get_agent_pod_id() {
fi

if [ "$AGENT_POD_READY" == "true" ]; then
POD_ID=$($KUBECTL get pod -n ${AGENT_NAMESPACE} 2> /dev/null | grep "agent-" | cut -d " " -f1 2> /dev/null)
POD_ID=$($KUBECTL get pod -n ${AGENT_NAMESPACE} -l app=agent,type!=auto-upgrade-cronjob 2> /dev/null | grep "agent-" | cut -d " " -f1 2> /dev/null)
if [ -n "${POD_ID}" ]; then
log_info "get pod: ${POD_ID}"
else
Expand All @@ -230,7 +226,7 @@ function removeNodeFromLocalAndManagementHub() {
log_debug "removeNodeFromLocalAndManagementHub() begin"
log_info "Check node status for agent pod: ${POD_ID}"

NODE_INFO=$($KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "hzn node list")
NODE_INFO=$($KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "hzn node list")
NODE_STATE=$(echo $NODE_INFO | jq -r .configstate.state | sed 's/[^a-z]*//g')
NODE_ID=$(echo $NODE_INFO | jq -r .id | sed 's/\r//g')
log_debug "NODE config state for ${NODE_ID} is ${NODE_STATE}"
Expand Down Expand Up @@ -273,11 +269,11 @@ function unregister() {
fi

set +e
$KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "${HZN_UNREGISTER_CMD}"
$KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "${HZN_UNREGISTER_CMD}"
set -e

# verify the node is unregistered
NODE_STATE=$($KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "hzn node list | jq -r .configstate.state" | sed 's/[^a-z]*//g')
NODE_STATE=$($KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "hzn node list | jq -r .configstate.state" | sed 's/[^a-z]*//g')
log_debug "NODE config state is ${NODE_STATE}"

if [[ "$NODE_STATE" != "unconfigured" ]] && [[ "$NODE_STATE" != "unconfiguring" ]]; then
Expand All @@ -287,8 +283,9 @@ function unregister() {
log_debug "unregister() end"
}

# escape: ;, $, &, |, (, )
function getEscapedExchangeUserAuth() {
local escaped_auth=$( echo "${HZN_EXCHANGE_USER_AUTH}" | sed 's/;/\\;/g;s/\$/\\$/g;s/\&/\\&/g;s/|/\\|/g' )
local escaped_auth=$( echo "${HZN_EXCHANGE_USER_AUTH}" | sed 's/;/\\;/g;s/\$/\\$/g;s/\&/\\&/g;s/|/\\|/g;s/(/\\(/g;s/)/\\)/g' )
echo "${escaped_auth}"
}

Expand All @@ -302,7 +299,7 @@ function deleteNodeFromManagementHub() {
log_info "Deleting node ${node_id} from the management hub..."

set +e
$KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node remove ${node_id} -f"
$KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node remove ${node_id} -f"
set -e

log_debug "deleteNodeFromManagementHub() end"
Expand All @@ -318,7 +315,7 @@ function verifyNodeRemovedFromManagementHub() {
log_info "Verifying node ${node_id} is removed from the management hub..."

set +e
$KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node list ${node_id}" >/dev/null 2>&1
$KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node list ${node_id}" >/dev/null 2>&1
if [ $? -ne 8 ]; then
log_warning "Node was not removed from the management hub"
fi
Expand Down Expand Up @@ -347,6 +344,13 @@ function deleteAgentResources() {
$KUBECTL delete deployment $DEPLOYMENT_NAME -n $AGENT_NAMESPACE --force=true --grace-period=0
fi

log_info "Deleting auto-upgrade cronjob..."
if $KUBECTL get cronjob ${CRONJOB_AUTO_UPGRADE_NAME} -n ${AGENT_NAMESPACE} 2>/dev/null; then
$KUBECTL delete cronjob $CRONJOB_AUTO_UPGRADE_NAME -n $AGENT_NAMESPACE
else
log_info "cronjob ${CRONJOB_AUTO_UPGRADE_NAME} does not exist, skip deleting cronjob"
fi

# give pods sometime to terminate by themselves
sleep 10

Expand All @@ -372,31 +376,23 @@ function deleteAgentResources() {
fi

log_info "Deleting configmap..."
$KUBECTL delete configmap $CONFIGMAP_NAME -n $AGENT_NAMESPACE
$KUBECTL delete configmap ${CONFIGMAP_NAME}-backup -n $AGENT_NAMESPACE
$KUBECTL delete configmap $CONFIGMAP_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete configmap ${CONFIGMAP_NAME}-backup -n $AGENT_NAMESPACE --ignore-not-found

log_info "Deleting secret..."
$KUBECTL delete secret $SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret $IMAGE_REGISTRY_SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret $IMAGE_PULL_SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret ${SECRET_NAME}-backup -n $AGENT_NAMESPACE
set -e

log_info "Deleting auto-upgrade cronjob..."
if $KUBECTL get cronjob ${CRONJOB_AUTO_UPGRADE_NAME} -n ${AGENT_NAMESPACE} 2>/dev/null; then
$KUBECTL delete cronjob $CRONJOB_AUTO_UPGRADE_NAME -n $AGENT_NAMESPACE
else
log_info "cronjob ${CRONJOB_AUTO_UPGRADE_NAME} does not exist, skip deleting cronjob"
fi

set +e
$KUBECTL delete clusterrolebinding ${AGENT_NAMESPACE}-${CLUSTER_ROLE_BINDING_NAME}
$KUBECTL delete secret $SECRET_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete secret $IMAGE_REGISTRY_SECRET_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete secret $IMAGE_PULL_SECRET_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete secret ${SECRET_NAME}-backup -n $AGENT_NAMESPACE --ignore-not-found

log_info "Deleting persistent volume..."
$KUBECTL delete pvc $PVC_NAME -n $AGENT_NAMESPACE
$KUBECTL delete pvc $PVC_NAME -n $AGENT_NAMESPACE --ignore-not-found

log_info "Deleting clusterrolebinding..."
$KUBECTL delete clusterrolebinding ${AGENT_NAMESPACE}-${CLUSTER_ROLE_BINDING_NAME} --ignore-not-found

log_info "Deleting serviceaccount..."
$KUBECTL delete serviceaccount $SERVICE_ACCOUNT_NAME -n $AGENT_NAMESPACE
$KUBECTL delete serviceaccount $SERVICE_ACCOUNT_NAME -n $AGENT_NAMESPACE --ignore-not-found

if [[ "$SKIP_DELETE_AGENT_NAMESPACE" != "true" ]]; then
log_info "Checking deployment and statefulset under namespace $AGENT_NAMESPACE"
Expand All @@ -413,7 +409,7 @@ function deleteAgentResources() {
fi

log_info "Deleting cert file from /etc/default/cert ..."
rm /etc/default/cert/agent-install.crt
rm -f /etc/default/cert/agent-install.crt
set -e

log_debug "deleteAgentResources() end"
Expand Down
9 changes: 6 additions & 3 deletions agreementbot/agreementbot.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func NewAgreementBotWorker(name string, cfg *config.HorizonConfig, db persistenc
newMessagesToProcess: false,
nodeSearch: NewNodeSearch(),
secretProvider: s,
secretUpdateManager: NewSecretUpdateManager(),
secretUpdateManager: NewSecretUpdateManager(cfg.AgreementBot.SecretsUpdateCheckInterval, cfg.AgreementBot.SecretsUpdateCheckInterval, cfg.AgreementBot.SecretsUpdateCheckMaxInterval, cfg.AgreementBot.SecretsUpdateCheckIncrement),
}

patternManager = NewPatternManager()
Expand Down Expand Up @@ -1634,7 +1634,7 @@ func (w *AgreementBotWorker) secretsProviderMaintenance() int {

// This function is called by the secrets update sub worker to learn about secrets that have been updated.
func (w *AgreementBotWorker) secretsUpdate() int {

nextRunWait := w.secretUpdateManager.PollInterval
secretUpdates, err := w.secretUpdateManager.CheckForUpdates(w.secretProvider, w.db)
if err != nil {
glog.Errorf(AWlogString(err))
Expand All @@ -1643,10 +1643,13 @@ func (w *AgreementBotWorker) secretsUpdate() int {
// Send out an event with the changed secrets and affected policies in it.
if secretUpdates != nil && secretUpdates.Length() != 0 {
w.Messages() <- events.NewSecretUpdatesMessage(events.UPDATED_SECRETS, secretUpdates)
nextRunWait = w.secretUpdateManager.AdjustSecretsPollingInterval(secretUpdates.Length())
} else {
nextRunWait = w.secretUpdateManager.AdjustSecretsPollingInterval(0)
}
}

return 0
return nextRunWait
}

func (w *AgreementBotWorker) monitorHAGroupNMPUpdates() int {
Expand Down
31 changes: 25 additions & 6 deletions agreementbot/consumer_protocol_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,18 +392,29 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste
glog.Infof(BCPHlogstring(b.Name(), fmt.Sprintf("attempting to update agreement %v due to change in policy", ag.CurrentAgreementId)))
}

msgPrinter := i18n.GetMessagePrinter()

svcAllPol := externalpolicy.ExternalPolicy{}
svcPolicyHandler := exchange.GetHTTPServicePolicyHandler(b)
svcResolveHandler := exchange.GetHTTPServiceDefResolverHandler(b)

for _, svcId := range ag.ServiceId {
if svcPol, err := exchange.GetServicePolicyWithId(b, svcId); err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("failed to get service policy for %v from the exchange: %v", svcId, err)))
if svcDef, err := exchange.GetServiceWithId(b, svcId); err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("failed to get service %v, error: %v", svcId, err)))
return false, false, false
} else if svcPol != nil {
svcAllPol.MergeWith(&svcPol.ExternalPolicy, false)
} else if svcDef != nil {
if mergedSvcPol, _, _, _, _, err := compcheck.GetServicePolicyWithDefaultProperties(svcPolicyHandler, svcResolveHandler, svcDef.URL, exchange.GetOrg(svcId), svcDef.Version, svcDef.Arch, msgPrinter); err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("failed to get merged service policy for %v, error: %v", svcId, err)))
return false, false, false
} else if mergedSvcPol != nil {
svcAllPol.MergeWith(mergedSvcPol, false)
}
}
}

msgPrinter := i18n.GetMessagePrinter()
if glog.V(5) {
glog.Infof(BCPHlogstring(b.Name(), fmt.Sprintf("For agreement %v merged svc policy is %v", ag.CurrentAgreementId, svcAllPol)))
}

busPolHandler := exchange.GetHTTPBusinessPoliciesHandler(b)
_, busPol, err := compcheck.GetBusinessPolicy(busPolHandler, ag.PolicyName, true, msgPrinter)
Expand Down Expand Up @@ -510,7 +521,7 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste
}
return true, true, false
}
// new cluster namespace is still compatible
// cluster namespace remains same
}
}

Expand All @@ -535,6 +546,13 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste
}
}

if same, msg := consumerPol.IsSamePolicy(oldPolicy); same {
glog.V(3).Infof("business policy(producerPol) %v content remains same with old policy; no update to agreement %s", ag.PolicyName, ag.CurrentAgreementId)
return true, true, true
} else {
glog.V(3).Infof("business policy %v content is changed in agreement %v: %v", ag.PolicyName, ag.CurrentAgreementId, msg)
}

newTsCs, err := policy.Create_Terms_And_Conditions(producerPol, consumerPol, wl, ag.CurrentAgreementId, b.config.AgreementBot.DefaultWorkloadPW, b.config.AgreementBot.NoDataIntervalS, basicprotocol.PROTOCOL_CURRENT_VERSION)
if err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("error creating new terms and conditions: %v", err)))
Expand All @@ -543,6 +561,7 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste

ag.LastPolicyUpdateTime = uint64(time.Now().Unix())

// this function will send out "basicagreementupdate"
b.UpdateAgreement(&ag, basicprotocol.MsgUpdateTypePolicyChange, newTsCs, cph)

return true, true, true
Expand Down
Loading