diff --git a/felix/dataplane/windows/endpoint_mgr.go b/felix/dataplane/windows/endpoint_mgr.go index 3839acf4b60..4f8bdae0fb8 100644 --- a/felix/dataplane/windows/endpoint_mgr.go +++ b/felix/dataplane/windows/endpoint_mgr.go @@ -188,14 +188,20 @@ func (m *endpointManager) RefreshHnsEndpointCache(forceRefresh bool) error { continue } - // An endpoint is considered to be active if its state is Attached or AttachedSharing. - // Note: Endpoint.State attribute is dependent on HNS v1 api. If hcsshim upgrades to HNS v2 - // api this will break. We then need to Reach out to Microsoft to facilate the change via HNS. - if endpoint.State.String() != "Attached" && endpoint.State.String() != "AttachedSharing" { + // Some CNI plugins do not clear endpoint properly when a pod has been torn down. + // In that case, it is possible Felix sees multiple endpoints with the same IP. + // We need to filter out inactive endpoints that do not attach to any container. + if len(endpoint.SharedContainers) == 0 { log.WithFields(log.Fields{ "id": endpoint.Id, "name": endpoint.Name, }).Warn("This is a stale endpoint with no container attached") + log.WithFields(log.Fields{ + "id": endpoint.Id, + "name": endpoint.Name, + "state": endpoint.State.String(), + "sharedcontainers": endpoint.SharedContainers, + }).Debug("Stale endpoint debug information") continue } ip := endpoint.IPAddress.String() + ipv4AddrSuffix