From ccc895b4c69f96e404f7f90f6c6a82af17024b9c Mon Sep 17 00:00:00 2001 From: Kristoffer Dalby Date: Mon, 16 Dec 2024 11:26:56 +0100 Subject: [PATCH] fixes to extra-record file watcher (#2298) * Fix excess error message during writes Fixes #2290 Signed-off-by: Kristoffer Dalby * retry filewatcher on removed files This should handled if files are deleted and added again, and for rename scenarios. Fixes #2289 Signed-off-by: Kristoffer Dalby * test more write and remove in filewatcher Signed-off-by: Kristoffer Dalby --------- Signed-off-by: Kristoffer Dalby --- hscontrol/dns/extrarecords.go | 49 ++++++++++++++++++--- integration/dns_test.go | 82 ++++++++++++++++++++++++++++++++--- 2 files changed, 120 insertions(+), 11 deletions(-) diff --git a/hscontrol/dns/extrarecords.go b/hscontrol/dns/extrarecords.go index 73f646ba98..e667c56208 100644 --- a/hscontrol/dns/extrarecords.go +++ b/hscontrol/dns/extrarecords.go @@ -7,6 +7,7 @@ import ( "os" "sync" + "github.com/cenkalti/backoff/v4" "github.com/fsnotify/fsnotify" "github.com/rs/zerolog/log" "tailscale.com/tailcfg" @@ -83,12 +84,39 @@ func (e *ExtraRecordsMan) Run() { log.Error().Caller().Msgf("file watcher event channel closing") return } - - log.Trace().Caller().Str("path", event.Name).Str("op", event.Op.String()).Msg("extra records received filewatch event") - if event.Name != e.path { - continue + switch event.Op { + case fsnotify.Create, fsnotify.Write, fsnotify.Chmod: + log.Trace().Caller().Str("path", event.Name).Str("op", event.Op.String()).Msg("extra records received filewatch event") + if event.Name != e.path { + continue + } + e.updateRecords() + + // If a file is removed or renamed, fsnotify will loose track of it + // and not watch it. We will therefore attempt to re-add it with a backoff. + case fsnotify.Remove, fsnotify.Rename: + err := backoff.Retry(func() error { + if _, err := os.Stat(e.path); err != nil { + return err + } + + return nil + }, backoff.NewExponentialBackOff()) + + if err != nil { + log.Error().Caller().Err(err).Msgf("extra records filewatcher retrying to find file after delete") + continue + } + + err = e.watcher.Add(e.path) + if err != nil { + log.Error().Caller().Err(err).Msgf("extra records filewatcher re-adding file after delete failed, giving up.") + return + } else { + log.Trace().Caller().Str("path", e.path).Msg("extra records file re-added after delete") + e.updateRecords() + } } - e.updateRecords() case err, ok := <-e.watcher.Errors: if !ok { @@ -116,6 +144,11 @@ func (e *ExtraRecordsMan) updateRecords() { return } + // If there are no records, ignore the update. + if records == nil { + return + } + e.mu.Lock() defer e.mu.Unlock() @@ -143,6 +176,12 @@ func readExtraRecordsFromPath(path string) ([]tailcfg.DNSRecord, [32]byte, error return nil, [32]byte{}, fmt.Errorf("reading path: %s, err: %w", path, err) } + // If the read was triggered too fast, and the file is not complete, ignore the update + // if the file is empty. A consecutive update will be triggered when the file is complete. + if len(b) == 0 { + return nil, [32]byte{}, nil + } + var records []tailcfg.DNSRecord err = json.Unmarshal(b, &records) if err != nil { diff --git a/integration/dns_test.go b/integration/dns_test.go index 7ae1c82bf6..d1693441b6 100644 --- a/integration/dns_test.go +++ b/integration/dns_test.go @@ -146,6 +146,27 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) { assertCommandOutputContains(t, client, []string{"dig", "test.myvpn.example.com"}, "6.6.6.6") } + hs, err := scenario.Headscale() + assertNoErr(t, err) + + // Write the file directly into place from the docker API. + b0, _ := json.Marshal([]tailcfg.DNSRecord{ + { + Name: "docker.myvpn.example.com", + Type: "A", + Value: "2.2.2.2", + }, + }) + + err = hs.WriteFile(erPath, b0) + assertNoErr(t, err) + + for _, client := range allClients { + assertCommandOutputContains(t, client, []string{"dig", "docker.myvpn.example.com"}, "2.2.2.2") + } + + // Write a new file and move it to the path to ensure the reload + // works when a file is moved atomically into place. extraRecords = append(extraRecords, tailcfg.DNSRecord{ Name: "otherrecord.myvpn.example.com", Type: "A", @@ -153,12 +174,6 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) { }) b2, _ := json.Marshal(extraRecords) - hs, err := scenario.Headscale() - assertNoErr(t, err) - - // Write it to a separate file to ensure Docker's API doesnt - // do anything unexpected and rather move it into place to trigger - // a reload. err = hs.WriteFile(erPath+"2", b2) assertNoErr(t, err) _, err = hs.Execute([]string{"mv", erPath + "2", erPath}) @@ -168,6 +183,61 @@ func TestResolveMagicDNSExtraRecordsPath(t *testing.T) { assertCommandOutputContains(t, client, []string{"dig", "test.myvpn.example.com"}, "6.6.6.6") assertCommandOutputContains(t, client, []string{"dig", "otherrecord.myvpn.example.com"}, "7.7.7.7") } + + // Write a new file and copy it to the path to ensure the reload + // works when a file is copied into place. + b3, _ := json.Marshal([]tailcfg.DNSRecord{ + { + Name: "copy.myvpn.example.com", + Type: "A", + Value: "8.8.8.8", + }, + }) + + err = hs.WriteFile(erPath+"3", b3) + assertNoErr(t, err) + _, err = hs.Execute([]string{"cp", erPath + "3", erPath}) + assertNoErr(t, err) + + for _, client := range allClients { + assertCommandOutputContains(t, client, []string{"dig", "copy.myvpn.example.com"}, "8.8.8.8") + } + + // Write in place to ensure pipe like behaviour works + b4, _ := json.Marshal([]tailcfg.DNSRecord{ + { + Name: "docker.myvpn.example.com", + Type: "A", + Value: "9.9.9.9", + }, + }) + command := []string{"echo", fmt.Sprintf("'%s'", string(b4)), ">", erPath} + _, err = hs.Execute([]string{"bash", "-c", strings.Join(command, " ")}) + assertNoErr(t, err) + + for _, client := range allClients { + assertCommandOutputContains(t, client, []string{"dig", "docker.myvpn.example.com"}, "9.9.9.9") + } + + // Delete the file and create a new one to ensure it is picked up again. + _, err = hs.Execute([]string{"rm", erPath}) + assertNoErr(t, err) + + time.Sleep(2 * time.Second) + + // The same paths should still be available as it is not cleared on delete. + for _, client := range allClients { + assertCommandOutputContains(t, client, []string{"dig", "docker.myvpn.example.com"}, "9.9.9.9") + } + + // Write a new file, the backoff mechanism should make the filewatcher pick it up + // again. + err = hs.WriteFile(erPath, b3) + assertNoErr(t, err) + + for _, client := range allClients { + assertCommandOutputContains(t, client, []string{"dig", "copy.myvpn.example.com"}, "8.8.8.8") + } } // TestValidateResolvConf validates that the resolv.conf file