Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce the frequency of firewall telemetry #3124

Merged
merged 3 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions azurelinuxagent/common/osutil/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,27 +266,27 @@ def remove_legacy_firewall_rule(self, dst_ip):

def enable_firewall(self, dst_ip, uid):
"""
It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, update rules flag)
It checks if every iptable rule exists and add them if not present. It returns a tuple(enable firewall success status, missing rules array)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of returning a boolean indicating that the rules were updated, now this method returns an array containing the rules that were missing when the firewall was updated.

enable firewall success status: Returns True if every firewall rule exists otherwise False
update rules flag: Returns True if rules are updated otherwise False
missing rules: array with names of the missing rules ("ACCEPT DNS", "ACCEPT", "DROP")
"""
# This is to send telemetry when iptable rules updated
is_firewall_rules_updated = False
# If a previous attempt failed, do not retry
global _enable_firewall # pylint: disable=W0603
if not _enable_firewall:
return False, is_firewall_rules_updated
return False, []

missing_rules = []

try:
wait = self.get_firewall_will_wait()

# check every iptable rule and delete others if any rule is missing
# and append every iptable rule to the end of the chain.
try:
if not AddFirewallRules.verify_iptables_rules_exist(wait, dst_ip, uid):
missing_rules.extend(AddFirewallRules.get_missing_iptables_rules(wait, dst_ip, uid))
if len(missing_rules) > 0:
self.remove_firewall(dst_ip, uid, wait)
AddFirewallRules.add_iptables_rules(wait, dst_ip, uid)
is_firewall_rules_updated = True
except CommandError as e:
if e.returncode == 2:
self.remove_firewall(dst_ip, uid, wait)
Expand All @@ -297,14 +297,14 @@ def enable_firewall(self, dst_ip, uid):
logger.warn(ustr(error))
raise

return True, is_firewall_rules_updated
return True, missing_rules

except Exception as e:
_enable_firewall = False
logger.info("Unable to establish firewall -- "
"no further attempts will be made: "
"{0}".format(ustr(e)))
return False, is_firewall_rules_updated
return False, missing_rules

def get_firewall_list(self, wait=None):
try:
Expand Down
18 changes: 13 additions & 5 deletions azurelinuxagent/common/utils/networkutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,14 +217,22 @@ def __execute_check_command(cmd):
return False

@staticmethod
def verify_iptables_rules_exist(wait, dst_ip, uid):
def get_missing_iptables_rules(wait, dst_ip, uid):
missing = []

check_cmd_tcp_rule = AddFirewallRules.get_accept_tcp_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait)
check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid,
wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_tcp_rule):
missing.append("ACCEPT DNS")

check_cmd_accept_rule = AddFirewallRules.get_wire_root_accept_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, uid, wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_accept_rule):
missing.append("ACCEPT")

check_cmd_drop_rule = AddFirewallRules.get_wire_non_root_drop_rule(AddFirewallRules.CHECK_COMMAND, dst_ip, wait=wait)
if not AddFirewallRules.__execute_check_command(check_cmd_drop_rule):
missing.append("DROP")

return AddFirewallRules.__execute_check_command(check_cmd_tcp_rule) and AddFirewallRules.__execute_check_command(check_cmd_accept_rule) \
and AddFirewallRules.__execute_check_command(check_cmd_drop_rule)
return missing

@staticmethod
def __execute_firewall_commands(dst_ip, uid, command=APPEND_COMMAND, firewalld_command="", wait=""):
Expand Down
34 changes: 25 additions & 9 deletions azurelinuxagent/ga/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#
# Requires Python 2.6+ and Openssl 1.0+
#

import datetime
import re
import os
import socket
Expand Down Expand Up @@ -105,6 +105,9 @@ def __init__(self, osutil, protocol):
self._protocol = protocol
self._try_remove_legacy_firewall_rule = False
self._is_first_setup = True
self._reset_count = 0
self._report_after = datetime.datetime.min
self._report_period = None # None indicates "report immediately"

def _operation(self):
# If the rules ever change we must reset all rules and start over again.
Expand All @@ -118,19 +121,32 @@ def _operation(self):
self._osutil.remove_legacy_firewall_rule(dst_ip=self._protocol.get_endpoint())
self._try_remove_legacy_firewall_rule = True

firewall_state = self._get_firewall_state()

success, is_firewall_rules_updated = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid())
success, missing_firewall_rules = self._osutil.enable_firewall(dst_ip=self._protocol.get_endpoint(), uid=os.getuid())

if is_firewall_rules_updated:
if len(missing_firewall_rules) > 0:
if self._is_first_setup:
msg = "Created Azure fabric firewall rules:\n{0}".format(self._get_firewall_state())
msg = "Created firewall rules for the Azure Fabric:\n{0}".format(self._get_firewall_state())
logger.info(msg)
add_event(op=WALAEventOperation.Firewall, message=msg)
else:
msg = "Reset Azure fabric firewall rules.\nInitial state:\n{0}\nCurrent state:\n{1}".format(firewall_state, self._get_firewall_state())
logger.info(msg)
add_event(op=WALAEventOperation.ResetFirewall, message=msg)
self._reset_count += 1
# We report immediately (when period is None) the first 5 instances, then we switch the period to every few hours
if self._report_period is None:
msg = "Some firewall rules were missing: {0}. Re-created all the rules:\n{1}".format(missing_firewall_rules, self._get_firewall_state())
if self._reset_count >= 5:
self._report_period = datetime.timedelta(hours=3)
self._reset_count = 0
self._report_after = datetime.datetime.now() + self._report_period
elif datetime.datetime.now() >= self._report_after:
msg = "Some firewall rules were missing: {0}. This has happened {1} time(s) since the last report. Re-created all the rules:\n{2}".format(
missing_firewall_rules, self._reset_count, self._get_firewall_state())
self._reset_count = 0
self._report_after = datetime.datetime.now() + self._report_period
else:
msg = ""
if msg != "":
logger.info(msg)
add_event(op=WALAEventOperation.ResetFirewall, message=msg)

add_periodic(
logger.EVERY_HOUR,
Expand Down
8 changes: 4 additions & 4 deletions tests/common/osutil/test_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -822,13 +822,13 @@ def test_enable_firewall_should_not_use_wait_when_iptables_does_not_support_it(s
success, _ = osutil.DefaultOSUtil().enable_firewall(dst_ip=mock_iptables.destination, uid=mock_iptables.uid)

self.assertTrue(success, "Enabling the firewall was not successful")
# Exactly 8 calls have to be made.
# First check rule, delete 4 rules,
# Exactly 10 calls have to be made.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously verify_iptables_rules_exist (now named get_missing_iptables_rules) would return on the first failure; now it always checks all the rules to find all the missing ones. The call count needs to be updated.

# First check 3 rules, delete 4 rules,
# and Append the IPTable 3 rules.
self.assertEqual(len(mock_iptables.command_calls), 8,
self.assertEqual(len(mock_iptables.command_calls), 10,
"Incorrect number of calls to iptables: [{0}]".format(mock_iptables.command_calls))
for command in mock_iptables.command_calls:
self.assertNotIn("-w", command, "The -w option should have been used in {0}".format(command))
self.assertNotIn("-w", command, "The -w option sh ould have been used in {0}".format(command))

self.assertTrue(osutil._enable_firewall, "The firewall should not have been disabled")

Expand Down
Loading