From 7863f8b77919052190e7b4c8431031f806999fe1 Mon Sep 17 00:00:00 2001 From: Angus Lees Date: Fri, 7 Aug 2020 15:35:18 +1000 Subject: [PATCH] Improve CRI->checkpoint logic in the face of downgrades Commit e38a4d8a8a9358f9b1e24f465a8d42a95a837533 introduced a method of checkpointing IPAM state to disk and avoiding CRI access. As written, that commit failed to handle upgrade/downgrade/upgrade: on the second upgrade the checkpoint file would exist but contain stale information. This change improves the logic to use a more robust "expand/contract" rollout: Migration phase0 (CNI 1.6): Read/write[*] from CRI only. Migration phase1 (CNI 1.7): Read from CRI. Write to CRI+file. Migration phase2 (CNI 1.8?): Read from file. Write to CRI+file. Migration phase3 (hypothetical): Read/write from file only. This (and the previous) commit implements phase1. [*] There is no "write" to CRI directly, but the logic assumes it happens indirectly as a consequence of a successful CNI add/delete operation. Phase2 and phase3 are thus equivalent in this migration. Note this supports upgrades/downgrades across a single version only (without a node reboot). --- pkg/ipamd/datastore/data_store.go | 107 ++++++++++++++++++++---------- pkg/ipamd/ipamd_test.go | 5 +- 2 files changed, 77 insertions(+), 35 deletions(-) diff --git a/pkg/ipamd/datastore/data_store.go b/pkg/ipamd/datastore/data_store.go index 0d26622c1c..b0655416c4 100644 --- a/pkg/ipamd/datastore/data_store.go +++ b/pkg/ipamd/datastore/data_store.go @@ -52,10 +52,28 @@ const ( UnknownENIError = "datastore: unknown ENI" ) -// Enable temporary checkpoint upgrade logic. If checkpoint file is -// not found, query CRI to learn current pods and currently-allocated -// IPs. Will be disabled/removed after next release. -const BackfillMissingCheckpointFromCRI = true +// We need to know which IPs are already allocated across +// ipamd/datastore restarts. In vpc-cni <=1.6, we "stored" the +// allocated IPs by querying kubelet's CRI. Since this requires scary +// access to CRI socket, and may race with CRI's internal logic, we +// are transitioning away from from this to storing allocations +// ourself in a file (similar to host-ipam CNI plugin). +// +// Because we don't want to require a node restart during CNI +// upgrade/downgrade, we need an "expand/contract" style upgrade to +// keep the two stores in sync: +// +// Migration phase0 (CNI 1.6): Read/write from CRI only. +// Migration phase1 (CNI 1.7): Read from CRI. Write to CRI+file. +// Migration phase2 (CNI 1.8?): Read from file. Write to CRI+file. +// Migration phase3 (hypothetical): Read/write from file only. +// +// Note phase3 is not necessary since writes to CRI are implicit. +// At/after phase2, we can remove any code protected by +// CheckpointMigrationPhase<2. +const CheckpointMigrationPhase = 1 + +// Placeholders used for unknown values when reading from CRI. const BackfillNetworkName = "_migrated-from-cri" const BackfillNetworkIface = "unknown" @@ -199,13 +217,14 @@ type PodIPInfo struct { // DataStore contains node level ENI/IP type DataStore struct { - total int - assigned int - eniPool ENIPool - lock sync.Mutex - log logger.Logger - backingStore Checkpointer - cri cri.APIs + total int + assigned int + eniPool ENIPool + lock sync.Mutex + log logger.Logger + CheckpointMigrationPhase int + backingStore Checkpointer + cri cri.APIs } // ENIInfos contains ENI IP information @@ -233,10 +252,11 @@ func prometheusRegister() { func NewDataStore(log logger.Logger, backingStore Checkpointer) *DataStore { prometheusRegister() return &DataStore{ - eniPool: make(ENIPool), - log: log, - backingStore: backingStore, - cri: cri.New(), + eniPool: make(ENIPool), + log: log, + backingStore: backingStore, + cri: cri.New(), + CheckpointMigrationPhase: CheckpointMigrationPhase, } } @@ -262,13 +282,13 @@ type CheckpointEntry struct { // configured backing store. Should be called before using data // store. func (ds *DataStore) ReadBackingStore() error { - ds.log.Infof("Reading ipam state from backing store") - var data CheckpointData - err := ds.backingStore.Restore(&data) - ds.log.Debugf("backing store restore returned err %v", err) - if BackfillMissingCheckpointFromCRI && os.IsNotExist(err) { - ds.log.Infof("Backing store not found. Querying CRI.") + + switch ds.CheckpointMigrationPhase { + case 1: + // Phase1: Read from CRI + ds.log.Infof("Reading ipam state from CRI") + sandboxes, err := ds.cri.GetRunningPodSandboxes(ds.log) if err != nil { return err @@ -291,18 +311,28 @@ func (ds *DataStore) ReadBackingStore() error { Allocations: entries, } - } else if os.IsNotExist(err) { - // Assume that no file == no containers are - // currently in use, eg a fresh reboot just - // cleared everything out. This is ok, and a - // no-op. - return nil - } else if err != nil { - return fmt.Errorf("datastore: error reading backing store: %v", err) - } + case 2: + // Phase2: Read from checkpoint file + ds.log.Infof("Reading ipam state from backing store") + + err := ds.backingStore.Restore(&data) + ds.log.Debugf("backing store restore returned err %v", err) + if os.IsNotExist(err) { + // Assume that no file == no containers are + // currently in use, eg a fresh reboot just + // cleared everything out. This is ok, and a + // no-op. + return nil + } else if err != nil { + return fmt.Errorf("datastore: error reading backing store: %v", err) + } - if data.Version != CheckpointFormatVersion { - return fmt.Errorf("datastore: unknown backing store format (%s != %s) - wrong CNI/ipamd version? (Rebooting this node will restart local pods and probably help)", data.Version, CheckpointFormatVersion) + if data.Version != CheckpointFormatVersion { + return fmt.Errorf("datastore: unknown backing store format (%s != %s) - wrong CNI/ipamd version? (Rebooting this node will restart local pods and probably help)", data.Version, CheckpointFormatVersion) + } + + default: + panic(fmt.Sprintf("Unexpected value of CheckpointMigrationPhase: %v", ds.CheckpointMigrationPhase)) } ds.lock.Lock() @@ -327,7 +357,7 @@ func (ds *DataStore) ReadBackingStore() error { ds.log.Debugf("Recovered %s => %s/%s", allocation.IPAMKey, eni.ID, addr.Address) } - ds.log.Debugf("Completed reading ipam state from backing store") + ds.log.Debugf("Completed ipam state recovery") return nil } @@ -682,7 +712,16 @@ func (ds *DataStore) UnassignPodIPv4Address(ipamKey IPAMKey) (ip string, deviceN eni, addr := ds.eniPool.FindAddressForSandbox(ipamKey) - if BackfillMissingCheckpointFromCRI && addr == nil { + if addr == nil { + // This `if` block should be removed when the CRI + // migration code is finally removed. Leaving a + // compile dependency here to make that obvious :P + var _ = CheckpointMigrationPhase + + // If the entry was discovered by querying CRI at + // restart rather than by observing an ADD operation + // directly, then we won't have captured the true + // networkname/ifname. ds.log.Debugf("UnassignPodIPv4Address: Failed to find IPAM entry under full key, trying CRI-migrated version") ipamKey.NetworkName = BackfillNetworkName ipamKey.IfName = BackfillNetworkIface diff --git a/pkg/ipamd/ipamd_test.go b/pkg/ipamd/ipamd_test.go index 3b54cc6de2..31f7c555d8 100644 --- a/pkg/ipamd/ipamd_test.go +++ b/pkg/ipamd/ipamd_test.go @@ -92,6 +92,7 @@ func TestNodeInit(t *testing.T) { networkClient: m.network, dataStore: datastore.NewDataStore(log, datastore.NewTestCheckpoint(fakeCheckpoint)), } + mockContext.dataStore.CheckpointMigrationPhase = 2 eni1, eni2 := getDummyENIMetadata() @@ -503,7 +504,9 @@ func TestIPAMContext_nodeIPPoolTooLow(t *testing.T) { } func testDatastore() *datastore.DataStore { - return datastore.NewDataStore(log, datastore.NewTestCheckpoint(datastore.CheckpointData{Version: datastore.CheckpointFormatVersion})) + ds := datastore.NewDataStore(log, datastore.NewTestCheckpoint(datastore.CheckpointData{Version: datastore.CheckpointFormatVersion})) + ds.CheckpointMigrationPhase = 2 + return ds } func datastoreWith3FreeIPs() *datastore.DataStore {