diff --git a/benchmark/scripts/compare_perf_tests.py b/benchmark/scripts/compare_perf_tests.py index f5c200dc7d6d7..017ba24c10229 100755 --- a/benchmark/scripts/compare_perf_tests.py +++ b/benchmark/scripts/compare_perf_tests.py @@ -229,7 +229,8 @@ class PerformanceTestResult(object): `--quantile`parameter. In both cases, the last column, MAX_RSS is optional. """ - def __init__(self, csv_row, quantiles=False, memory=False, delta=False): + def __init__(self, csv_row, quantiles=False, memory=False, delta=False, + meta=False): """Initialize from a row of multiple columns with benchmark summary. The row is an iterable, such as a row provided by the CSV parser. @@ -239,7 +240,8 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False): self.num_samples = int(csv_row[2]) # Number of measurements taken if quantiles: # Variable number of columns representing quantiles - runtimes = csv_row[3:-1] if memory else csv_row[3:] + mem_index = (-1 if memory else 0) + (-3 if meta else 0) + runtimes = csv_row[3:mem_index] if memory or meta else csv_row[3:] if delta: runtimes = [int(x) if x else 0 for x in runtimes] runtimes = reduce(lambda l, x: l.append(l[-1] + x) or # runnin @@ -261,7 +263,7 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False): self.min, self.max, self.median, self.mean, self.sd = \ sams.min, sams.max, sams.median, sams.mean, sams.sd self.max_rss = ( # Maximum Resident Set Size (B) - int(csv_row[-1]) if memory else None) + int(csv_row[mem_index]) if memory else None) else: # Legacy format with statistics for normal distribution. self.min = int(csv_row[3]) # Minimum runtime (μs) self.max = int(csv_row[4]) # Maximum runtime (μs) @@ -271,6 +273,11 @@ def __init__(self, csv_row, quantiles=False, memory=False, delta=False): self.max_rss = ( # Maximum Resident Set Size (B) int(csv_row[8]) if len(csv_row) > 8 else None) self.samples = None + + # Optional measurement metadata. The number of: + # memory pages used, involuntary context switches and voluntary yields + self.mem_pages, self.involuntary_cs, self.yield_count = \ + [int(x) for x in csv_row[-3:]] if meta else (None, None, None) self.yields = None self.setup = None @@ -352,6 +359,7 @@ def __init__(self): """Create instance of `LogParser`.""" self.results = [] self.quantiles, self.delta, self.memory = False, False, False + self.meta = False self._reset() def _reset(self): @@ -371,12 +379,12 @@ def _append_result(self, result): columns = result.split(',') if ',' in result else result.split() r = PerformanceTestResult( columns, quantiles=self.quantiles, memory=self.memory, - delta=self.delta) + delta=self.delta, meta=self.meta) r.setup = self.setup r.max_rss = r.max_rss or self.max_rss - r.mem_pages = self.mem_pages + r.mem_pages = r.mem_pages or self.mem_pages r.voluntary_cs = self.voluntary_cs - r.involuntary_cs = self.involuntary_cs + r.involuntary_cs = r.involuntary_cs or self.involuntary_cs if self.samples: r.samples = PerformanceTestSamples(r.name, self.samples) r.samples.exclude_outliers() @@ -391,6 +399,7 @@ def _store_memory_stats(self, max_rss, mem_pages): def _configure_format(self, header): self.quantiles = 'MEAN' not in header self.memory = 'MAX_RSS' in header + self.meta = 'PAGES' in header self.delta = '𝚫' in header # Regular expression and action to take when it matches the parsed line diff --git a/benchmark/scripts/test_compare_perf_tests.py b/benchmark/scripts/test_compare_perf_tests.py index 3d27769851281..4c1c6effffcd5 100644 --- a/benchmark/scripts/test_compare_perf_tests.py +++ b/benchmark/scripts/test_compare_perf_tests.py @@ -308,6 +308,46 @@ def validatePTR(deq): # construct from delta encoded quantiles string 1,,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1""" map(validatePTR, delta_encoded_quantiles.split('\n')[1:]) + def test_init_meta(self): + # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),… + # …PAGES,ICS,YIELD + log = '1,Ackermann,200,715,1281,726,47,715,7,29,15' + r = PerformanceTestResult(log.split(','), meta=True) + self.assertEqual((r.test_num, r.name), ('1', 'Ackermann')) + self.assertEqual( + (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), + (200, 715, 1281, 726, 47, 715)) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), + (7, 29, 15)) + # #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),MAX_RSS(B),… + # …PAGES,ICS,YIELD + log = '1,Ackermann,200,715,1951,734,97,715,36864,9,50,15' + r = PerformanceTestResult(log.split(','), memory=True, meta=True) + self.assertEqual( + (r.num_samples, r.min, r.max, r.mean, r.sd, r.median), + (200, 715, 1951, 734, 97, 715)) + self.assertEqual( + (r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), + (9, 50, 15, 36864)) + # #,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD + log = '1,Ackermann,200,715,3548,8,31,15' + r = PerformanceTestResult(log.split(','), quantiles=True, meta=True) + self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 3548)) + self.assertEqual((r.samples.count, r.samples.min, r.samples.max), + (2, 715, 3548)) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), + (8, 31, 15)) + # #,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD + log = '1,Ackermann,200,715,1259,32768,8,28,15' + r = PerformanceTestResult( + log.split(','), quantiles=True, memory=True, meta=True) + self.assertEqual((r.num_samples, r.min, r.max), (200, 715, 1259)) + self.assertEqual((r.samples.count, r.samples.min, r.samples.max), + (2, 715, 1259)) + self.assertEquals(r.max_rss, 32768) + self.assertEqual((r.mem_pages, r.involuntary_cs, r.yield_count), + (8, 28, 15)) + def test_repr(self): log_line = '1,AngryPhonebook,20,10664,12933,11035,576,10884' r = PerformanceTestResult(log_line.split(',')) @@ -517,6 +557,33 @@ def test_parse_delta_quantiles(self): # last 3 ventiles were outliers and were excluded from the sample (200, 214, 215, 18)) + def test_parse_meta(self): + r = LogParser.results_from_string( + '#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),' + + 'PAGES,ICS,YIELD\n' + + '0,B,1,2,2,2,0,2,7,29,15')['B'] + self.assertEqual( + (r.min, r.mem_pages, r.involuntary_cs, r.yield_count), + (2, 7, 29, 15)) + r = LogParser.results_from_string( + '#,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs),' + + 'MAX_RSS(B),PAGES,ICS,YIELD\n' + + '0,B,1,3,3,3,0,3,36864,9,50,15')['B'] + self.assertEqual( + (r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), + (3, 9, 50, 15, 36864)) + r = LogParser.results_from_string( + '#,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD\n' + + '0,B,1,4,4,8,31,15')['B'] + self.assertEqual((r.min, r.mem_pages, r.involuntary_cs, r.yield_count), + (4, 8, 31, 15)) + r = LogParser.results_from_string( + '#,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD\n' + + '0,B,1,5,5,32768,8,28,15')['B'] + self.assertEqual( + (r.min, r.mem_pages, r.involuntary_cs, r.yield_count, r.max_rss), + (5, 8, 28, 15, 32768)) + def test_parse_results_verbose(self): """Parse multiple performance test results with 2 sample formats: single line for N = 1; two lines for N > 1. diff --git a/benchmark/utils/ArgParse.swift b/benchmark/utils/ArgParse.swift index 81bd8502d15e7..41d23017cebf9 100644 --- a/benchmark/utils/ArgParse.swift +++ b/benchmark/utils/ArgParse.swift @@ -112,7 +112,8 @@ class ArgumentParser { .split(separator: "\n") .joined(separator: "\n" + padded("")) } - let positional = f("TEST", "name or number of the benchmark to measure") + let positional = f("TEST", "name or number of the benchmark to measure;\n" + + "use +/- prefix to filter by substring match") let optional = arguments.filter { $0.name != nil } .map { f($0.name!, $0.help ?? "") } .joined(separator: "\n") @@ -152,7 +153,6 @@ class ArgumentParser { /// We assume that optional switch args are of the form: /// /// --opt-name[=opt-value] - /// -opt-name[=opt-value] /// /// with `opt-name` and `opt-value` not containing any '=' signs. Any /// other option passed in is assumed to be a positional argument. @@ -165,7 +165,7 @@ class ArgumentParser { for arg in CommandLine.arguments[1..? var numSamples: UInt? + var minSamples: UInt? var numIters: UInt? var quantile: UInt? var delta: Bool? @@ -112,6 +127,7 @@ struct TestConfig { var sampleTime: Double? var verbose: Bool? var logMemory: Bool? + var logMeta: Bool? var action: TestAction? var tests: [String]? } @@ -136,6 +152,9 @@ struct TestConfig { "default: 1 or auto-scaled to measure for\n" + "`sample-time` if num-iters is also specified\n", parser: { UInt($0) }) + p.addArgument("--min-samples", \.minSamples, + help: "minimum number of samples to take per benchmark\n", + parser: { UInt($0) }) p.addArgument("--num-iters", \.numIters, help: "number of iterations averaged in the sample;\n" + "default: auto-scaled to measure for `sample-time`", @@ -154,6 +173,8 @@ struct TestConfig { help: "increase output verbosity") p.addArgument("--memory", \.logMemory, defaultValue: true, help: "log the change in maximum resident set size (MAX_RSS)") + p.addArgument("--meta", \.logMeta, defaultValue: true, + help: "log the metadata (memory usage, context switches)") p.addArgument("--delim", \.delim, help:"value delimiter used for log output; default: ,", parser: { $0 }) @@ -179,14 +200,16 @@ struct TestConfig { sampleTime = c.sampleTime ?? 1.0 numIters = c.numIters.map { Int($0) } numSamples = c.numSamples.map { Int($0) } + minSamples = c.minSamples.map { Int($0) } quantile = c.quantile.map { Int($0) } delta = c.delta ?? false verbose = c.verbose ?? false logMemory = c.logMemory ?? false + logMeta = c.logMeta ?? false afterRunSleep = c.afterRunSleep action = c.action ?? .run tests = TestConfig.filterTests(registeredBenchmarks, - specifiedTests: Set(c.tests ?? []), + tests: c.tests ?? [], tags: c.tags ?? [], skipTags: c.skipTags ?? [.unstable, .skip]) @@ -206,8 +229,10 @@ struct TestConfig { let configuration = """ --- CONFIG --- NumSamples: \(numSamples ?? 0) + MinSamples: \(minSamples ?? 0) Verbose: \(verbose) LogMemory: \(logMemory) + LogMeta: \(logMeta) SampleTime: \(sampleTime) NumIters: \(numIters ?? 0) Quantile: \(quantile ?? 0) @@ -232,10 +257,16 @@ struct TestConfig { /// specified filtering conditions. static func filterTests( _ registeredBenchmarks: [BenchmarkInfo], - specifiedTests: Set, + tests: [String], tags: Set, skipTags: Set ) -> [(index: String, info: BenchmarkInfo)] { + var t = tests + let filtersIndex = t.partition { $0.hasPrefix("+") || $0.hasPrefix("-") } + let excludesIndex = t[filtersIndex...].partition { $0.hasPrefix("-") } + let specifiedTests = Set(t[.. Bool { return specifiedTests.contains(b.name) || - specifiedTests.contains(indices[b.name]!) - } // !! "`allTests` have been assigned an index" + // !! "`allTests` have been assigned an index" + specifiedTests.contains(indices[b.name]!) || + (includes.contains { b.name.contains($0) } && + excludes.allSatisfy { !b.name.contains($0) } ) + } return allTests - .filter(specifiedTests.isEmpty ? byTags : byNamesOrIndices) + .filter(tests.isEmpty ? byTags : byNamesOrIndices) .map { (index: indices[$0.name]!, info: $0) } } } +extension String { + func contains(_ str: Substring) -> Bool { + guard let c = str.first else { return false } + var s = self[...] + repeat { + s = s[(s.firstIndex(of: c) ?? s.endIndex)...] + if s.starts(with: str) { return true } + s = s.dropFirst() + } while s.startIndex != s.endIndex + return false + } +} + struct Stats { var n: Int = 0 var S: Double = 0.0 @@ -340,6 +387,7 @@ final class TestRunner { var start, end, lastYield: Timer.TimeT let baseline = TestRunner.getResourceUtilization() let schedulerQuantum = UInt64(10_000_000) // nanoseconds (== 10ms, macos) + var yieldCount = 0 init(_ config: TestConfig) { self.c = config @@ -350,6 +398,7 @@ final class TestRunner { /// Offer to yield CPU to other processes and return current time on resume. func yield() -> Timer.TimeT { sched_yield() + yieldCount += 1 return timer.getTime() } @@ -383,7 +432,17 @@ final class TestRunner { var u = rusage(); getrusage(rusageSelf, &u); return u } - /// Returns maximum resident set size (MAX_RSS) delta in bytes. + static let pageSize: Int = { + #if canImport(Darwin) + let pageSize = _SC_PAGESIZE + #else + let pageSize = Int32(_SC_PAGESIZE) + #endif + return sysconf(pageSize) + }() + + /// Returns metadata about the measurement, such as memory usage and number + /// of context switches. /// /// This method of estimating memory usage is valid only for executing single /// benchmark. That's why we don't worry about reseting the `baseline` in @@ -391,30 +450,34 @@ final class TestRunner { /// /// FIXME: This current implementation doesn't work on Linux. It is disabled /// permanently to avoid linker errors. Feel free to fix. - func measureMemoryUsage() -> Int? { + func collectMetadata() -> MeasurementMetadata? { #if os(Linux) return nil #else - guard c.logMemory else { return nil } let current = TestRunner.getResourceUtilization() - let maxRSS = current.ru_maxrss - baseline.ru_maxrss -#if canImport(Darwin) - let pageSize = _SC_PAGESIZE -#else - let pageSize = Int32(_SC_PAGESIZE) -#endif - let pages = { maxRSS / sysconf(pageSize) } + func delta(_ stat: KeyPath) -> Int { + return current[keyPath: stat] - baseline[keyPath: stat] + } + let maxRSS = delta(\rusage.ru_maxrss) + let pages = maxRSS / TestRunner.pageSize func deltaEquation(_ stat: KeyPath) -> String { let b = baseline[keyPath: stat], c = current[keyPath: stat] return "\(c) - \(b) = \(c - b)" } logVerbose( """ - MAX_RSS \(deltaEquation(\rusage.ru_maxrss)) (\(pages()) pages) + MAX_RSS \(deltaEquation(\rusage.ru_maxrss)) (\(pages) pages) ICS \(deltaEquation(\rusage.ru_nivcsw)) VCS \(deltaEquation(\rusage.ru_nvcsw)) + yieldCount \(yieldCount) """) - return maxRSS + return MeasurementMetadata( + maxRSS: maxRSS, + pages: pages, + ics: delta(\rusage.ru_nivcsw), + vcs: delta(\rusage.ru_nvcsw), + yields: yieldCount + ) #endif } @@ -438,6 +501,7 @@ final class TestRunner { private func resetMeasurements() { let now = yield() (start, end, lastYield) = (now, now, now) + yieldCount = 0 } /// Time in nanoseconds spent running the last function @@ -516,8 +580,11 @@ final class TestRunner { Int.max / 10_000, // by the inner loop multiplier inside the `testFn`. c.numIters ?? calibrateMeasurements()) - let numSamples = c.numSamples ?? min(200, // Cap the number of samples - c.numIters == nil ? 1 : calibrateMeasurements()) + let numSamples = c.numSamples ?? + // Compute the number of samples to measure for `sample-time`, + // clamped in (`min-samples`, 200) range, if the `num-iters` are fixed. + max(c.minSamples ?? 1, min(200, c.numIters == nil ? 1 : + calibrateMeasurements())) samples.reserveCapacity(numSamples) logVerbose(" Collecting \(numSamples) samples.") @@ -532,7 +599,7 @@ final class TestRunner { samples = samples.map { $0 * lf } } - return BenchResults(samples, maxRSS: measureMemoryUsage()) + return BenchResults(samples, collectMetadata()) } var header: String { @@ -554,7 +621,8 @@ final class TestRunner { ["#", "TEST", "SAMPLES"] + (c.quantile.map(quantiles) ?? ["MIN", "MAX", "MEAN", "SD", "MEDIAN"].map(withUnit)) + - (c.logMemory ? ["MAX_RSS(B)"] : []) + (c.logMemory ? ["MAX_RSS(B)"] : []) + + (c.logMeta ? ["PAGES", "ICS", "YIELD"] : []) ).joined(separator: c.delim) } @@ -571,12 +639,14 @@ final class TestRunner { $0.encoded.append($1 - $0.last); $0.last = $1 }.encoded : qs } - return ( - [r.sampleCount] + + let values: [Int] = [r.sampleCount] + (c.quantile.map(quantiles) ?? [r.min, r.max, r.mean, r.sd, r.median]) + - [r.maxRSS].compactMap { $0 } - ).map { (c.delta && $0 == 0) ? "" : String($0) } // drop 0s in deltas + (c.logMemory ? [r.meta?.maxRSS].compactMap { $0 } : []) + + (c.logMeta ? r.meta.map { + [$0.pages, $0.ics, $0.yields] } ?? [] : []) + return values.map { + (c.delta && $0 == 0) ? "" : String($0) } // drop 0s in deltas } let benchmarkStats = ( [index, t.name] + (results.map(values) ?? ["Unsupported"]) diff --git a/test/benchmark/Benchmark_O.test.md b/test/benchmark/Benchmark_O.test.md index 20b2754aff257..65b0bbb83d123 100644 --- a/test/benchmark/Benchmark_O.test.md +++ b/test/benchmark/Benchmark_O.test.md @@ -100,18 +100,31 @@ ALPHASORT: FatCompactMap ```` +Substring filters using + and - prefix + +```` +RUN: %Benchmark_O --list -.A +Angry -Small AngryPhonebook.ASCII.Small \ +RUN: | %FileCheck %s --check-prefix FILTERS +FILTERS: AngryPhonebook.ASCII.Small +FILTERS-NOT: AngryPhonebook.Armenian +FILTERS-NOT: AngryPhonebook.Cyrillic.Small +FILTERS: AngryPhonebook.Cyrillic +FILTERS: AngryPhonebook.Strasse +```` + ## Running Benchmarks -Each real benchmark execution takes about a second per sample. If possible, -multiple checks are combined into one run to minimise the test time. +By default, each real benchmark execution takes about a second per sample. +To minimise the test time, multiple checks are combined into one run. ```` RUN: %Benchmark_O AngryPhonebook --num-iters=1 \ +RUN: --sample-time=0.000001 --min-samples=7 \ RUN: | %FileCheck %s --check-prefix NUMITERS1 \ RUN: --check-prefix LOGHEADER \ RUN: --check-prefix LOGBENCH LOGHEADER-LABEL: #,TEST,SAMPLES,MIN(μs),MAX(μs),MEAN(μs),SD(μs),MEDIAN(μs) LOGBENCH: {{[0-9]+}}, -NUMITERS1: AngryPhonebook,{{[0-9]+}} +NUMITERS1: AngryPhonebook,7 NUMITERS1-NOT: 0,0,0,0,0 LOGBENCH-SAME: ,{{[0-9]+}},{{[0-9]+}},{{[0-9]+}},{{[0-9]+}},{{[0-9]+}} ```` @@ -132,6 +145,22 @@ VENTILES: V7(μs),V8(μs),V9(μs),VA(μs),VB(μs),VC(μs),VD(μs),VE(μs),VF(μs VENTILES: VH(μs),VI(μs),VJ(μs),MAX(μs) ```` +### Reporting Measurement Metadata +The presence of optional argument `--meta`, controls logging of measurement +metadata at the end of the benchmark summary. + +* PAGES – number of memory pages used +* ICS – number of involuntary context switches +* YIELD – number of voluntary yields + +```` +RUN: %Benchmark_O 0 --quantile=1 --meta | %FileCheck %s --check-prefix META +META: #,TEST,SAMPLES,MIN(μs),MAX(μs),PAGES,ICS,YIELD +RUN: %Benchmark_O 0 --quantile=1 --meta --memory \ +RUN: | %FileCheck %s --check-prefix MEMMETA +MEMMETA: #,TEST,SAMPLES,MIN(μs),MAX(μs),MAX_RSS(B),PAGES,ICS,YIELD +```` + ### Verbose Mode Reports detailed information during measurement, including configuration details, environmental statistics (memory used and number of context switches) @@ -149,7 +178,8 @@ RUN: | %FileCheck %s --check-prefix RUNJUSTONCE \ RUN: --check-prefix CONFIG \ RUN: --check-prefix LOGVERBOSE \ RUN: --check-prefix MEASUREENV \ -RUN: --check-prefix LOGFORMAT +RUN: --check-prefix LOGFORMAT \ +RUN: --check-prefix YIELDCOUNT CONFIG: NumSamples: 2 CONFIG: Tests Filter: ["1", "Ackermann", "1", "AngryPhonebook"] CONFIG: Tests to run: Ackermann, AngryPhonebook @@ -162,6 +192,7 @@ LOGVERBOSE: Sample 1,{{[0-9]+}} MEASUREENV: MAX_RSS {{[0-9]+}} - {{[0-9]+}} = {{[0-9]+}} ({{[0-9]+}} pages) MEASUREENV: ICS {{[0-9]+}} - {{[0-9]+}} = {{[0-9]+}} MEASUREENV: VCS {{[0-9]+}} - {{[0-9]+}} = {{[0-9]+}} +YIELDCOUNT: yieldCount 1 RUNJUSTONCE-LABEL: 1,Ackermann RUNJUSTONCE-NOT: 1,Ackermann LOGFORMAT: ,{{[0-9]+}},{{[0-9]+}},,{{[0-9]*}},{{[0-9]+}}