Skip to content

Commit

Permalink
corrects open file metric
Browse files Browse the repository at this point in the history
The metrics for computing the current number of open files on a tablet
server was incorrect over time.  Used the semaphor limiting open files
to compute the number of open files.   The removes the need to correctly
increment and decrement files opened which had a bug.
  • Loading branch information
keith-turner committed Oct 22, 2024
1 parent 68cb5aa commit 47c88d4
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -593,4 +593,8 @@ public synchronized int getNumOpenFiles() {
public ScanFileManager newScanFileManager(KeyExtent tablet, CacheProvider cacheProvider) {
return new ScanFileManager(tablet, cacheProvider);
}

public int getOpenFiles() {
return maxOpen - filePermits.availablePermits();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ public void run() {
metricsInfo.addServiceTags(getApplicationName(), clientAddress);
metricsInfo.addCommonTags(List.of(Tag.of("resource.group", groupName)));

scanMetrics = new TabletServerScanMetrics();
scanMetrics = new TabletServerScanMetrics(resourceManager::getOpenFiles);
sessionManager.setZombieCountConsumer(scanMetrics::setZombieScanThreads);
scanServerMetrics = new ScanServerMetrics(tabletMetadataCache);
blockCacheMetrics = new BlockCacheMetrics(resourceManager.getIndexCache(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -765,7 +765,7 @@ public void run() {

metrics = new TabletServerMetrics(this);
updateMetrics = new TabletServerUpdateMetrics();
scanMetrics = new TabletServerScanMetrics();
scanMetrics = new TabletServerScanMetrics(this.resourceManager::getOpenFiles);
sessionManager.setZombieCountConsumer(scanMetrics::setZombieScanThreads);
mincMetrics = new TabletServerMinCMetrics();
ceMetrics = new CompactionExecutorsMetrics();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,10 @@ public TabletServerResourceManager(ServerContext context, TabletHostingServer ts
new AssignmentWatcher(acuConf, context, activeAssignments), 5000, TimeUnit.MILLISECONDS));
}

public int getOpenFiles() {
return fileManager.getOpenFiles();
}

/**
* Accepts some map which is tracking active assignment task(s) (running) and monitors them to
* ensure that the time the assignment(s) have been running don't exceed a threshold. If the time
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
package org.apache.accumulo.tserver.metrics;

import java.time.Duration;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import java.util.function.IntSupplier;

import org.apache.accumulo.core.metrics.MetricsProducer;
import org.apache.accumulo.server.metrics.NoopMetrics;
Expand All @@ -34,7 +34,7 @@

public class TabletServerScanMetrics implements MetricsProducer {

private final AtomicInteger openFiles = new AtomicInteger(0);
private final IntSupplier openFiles;
private Timer scans = NoopMetrics.useNoopTimer();
private DistributionSummary resultsPerScan = NoopMetrics.useNoopDistributionSummary();
private DistributionSummary yields = NoopMetrics.useNoopDistributionSummary();
Expand Down Expand Up @@ -96,14 +96,6 @@ public void addYield(long value) {
yields.record(value);
}

public void incrementOpenFiles(int delta) {
openFiles.addAndGet(Math.max(0, delta));
}

public void decrementOpenFiles(int delta) {
openFiles.addAndGet(delta < 0 ? delta : delta * -1);
}

public void incrementStartScan(double value) {
startScanCalls.increment(value);
}
Expand All @@ -128,9 +120,13 @@ public long getZombieThreadsCount() {
return zombieScanThreads.get();
}

public TabletServerScanMetrics(IntSupplier openFileSupplier) {
openFiles = openFileSupplier;
}

@Override
public void registerMetrics(MeterRegistry registry) {
Gauge.builder(METRICS_SCAN_OPEN_FILES, openFiles::get)
Gauge.builder(METRICS_SCAN_OPEN_FILES, openFiles::getAsInt)
.description("Number of files open for scans").register(registry);
scans = Timer.builder(METRICS_SCAN_TIMES).description("Scans").register(registry);
resultsPerScan = DistributionSummary.builder(METRICS_SCAN_RESULTS)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ public DataSource getNewDataSource() {
} finally {
try {
if (fileManager != null) {
tablet.getScanMetrics().decrementOpenFiles(fileManager.getNumOpenFiles());
fileManager.releaseOpenFiles(false);
}
} catch (Exception e) {
Expand Down Expand Up @@ -154,7 +153,6 @@ private SortedKeyValueIterator<Key,Value> createIterator() throws IOException {
// only acquire the file manager when we know the tablet is open
if (fileManager == null) {
fileManager = tablet.getTabletResources().newScanFileManager(scanParams.getScanDispatch());
tablet.getScanMetrics().incrementOpenFiles(fileManager.getNumOpenFiles());
log.trace("Adding active scan for {}, scanId:{}", tablet.getExtent(), scanDataSourceId);
tablet.addActiveScans(this);
}
Expand Down Expand Up @@ -274,7 +272,6 @@ public void close(boolean sawErrors) {
}
try {
if (fileManager != null) {
tablet.getScanMetrics().decrementOpenFiles(fileManager.getNumOpenFiles());
fileManager.releaseOpenFiles(sawErrors);
}
} finally {
Expand Down

0 comments on commit 47c88d4

Please sign in to comment.