From 015d1bfefe750693c9dfa575e610c9ec8ca9f321 Mon Sep 17 00:00:00 2001 From: rayluoluo Date: Sat, 17 Aug 2024 21:16:08 +0800 Subject: [PATCH 1/2] [improve][broker]Change the log level to reduce repeated error logs --- .../pulsar/broker/loadbalance/LinuxInfoUtils.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java index 9cf861a8e85cf..ad274b1bfbdf8 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java @@ -27,9 +27,11 @@ import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; +import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import java.util.stream.Stream; import lombok.AllArgsConstructor; @@ -60,6 +62,7 @@ public class LinuxInfoUtils { private static Method getCpuQuotaMethod; private static Method getCpuPeriodMethod; private static Method getCpuUsageMethod; + private static Set exposeNicLimitFaultCompleted = new HashSet<>(); static { try { @@ -251,7 +254,15 @@ public static double getTotalNicLimit(List nics, BitRateUnit bitRateUnit try { return readDoubleFromFile(getReplacedNICPath(NIC_SPEED_TEMPLATE, nicPath)); } catch (IOException e) { - log.error("[LinuxInfo] Failed to get total nic limit.", e); + if (!exposeNicLimitFaultCompleted.contains(nicPath)) { + log.error("[LinuxInfo] Failed to get the nic limit of {}.", nicPath, e); + // logs that fail to read the nic limit are printed at the ERROR level only for the first time + exposeNicLimitFaultCompleted.add(nicPath); + } else { + if (log.isDebugEnabled()) { + log.debug("[LinuxInfo] Failed to get the nic limit of {}.", nicPath, e); + } + } return 0d; } }).sum(), BitRateUnit.Megabit); From 8ed80db3f01a23b0aedf36dfcb155b70a9135cee Mon Sep 17 00:00:00 2001 From: rayluoluo Date: Tue, 27 Aug 2024 14:24:36 +0800 Subject: [PATCH 2/2] [improve][broker]Change the log level to reduce repeated error logs #23192 --- .../pulsar/broker/loadbalance/LinuxInfoUtils.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java index ad274b1bfbdf8..b63f0fe85b20c 100644 --- a/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java +++ b/pulsar-broker/src/main/java/org/apache/pulsar/broker/loadbalance/LinuxInfoUtils.java @@ -27,11 +27,10 @@ import java.nio.file.Paths; import java.util.Arrays; import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Locale; import java.util.Optional; -import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; import java.util.stream.Stream; import lombok.AllArgsConstructor; @@ -56,13 +55,13 @@ public class LinuxInfoUtils { // NIC type private static final int ARPHRD_ETHER = 1; private static final String NIC_SPEED_TEMPLATE = "/sys/class/net/%s/speed"; - + private static final long errLogPrintedFrequencyInReadingNicLimits = 1000; + private static final AtomicLong failedCounterInReadingNicLimits = new AtomicLong(0); private static Object /*jdk.internal.platform.Metrics*/ metrics; private static Method getMetricsProviderMethod; private static Method getCpuQuotaMethod; private static Method getCpuPeriodMethod; private static Method getCpuUsageMethod; - private static Set exposeNicLimitFaultCompleted = new HashSet<>(); static { try { @@ -254,10 +253,10 @@ public static double getTotalNicLimit(List nics, BitRateUnit bitRateUnit try { return readDoubleFromFile(getReplacedNICPath(NIC_SPEED_TEMPLATE, nicPath)); } catch (IOException e) { - if (!exposeNicLimitFaultCompleted.contains(nicPath)) { + // ERROR-level logs about NIC rate limiting reading failures are periodically printed but not + // continuously printed + if (failedCounterInReadingNicLimits.getAndIncrement() % errLogPrintedFrequencyInReadingNicLimits == 0) { log.error("[LinuxInfo] Failed to get the nic limit of {}.", nicPath, e); - // logs that fail to read the nic limit are printed at the ERROR level only for the first time - exposeNicLimitFaultCompleted.add(nicPath); } else { if (log.isDebugEnabled()) { log.debug("[LinuxInfo] Failed to get the nic limit of {}.", nicPath, e);