Skip to content

Commit

Permalink
Trimmed samples no longer affect statistics
Browse files Browse the repository at this point in the history
Calculation of statistics now omits the first
TRIM_SAMPLES samples

Signed-off-by: Michael Maurer <maurer.mi@northeastern.edu>
  • Loading branch information
maurermi authored and HalosGhost committed Aug 8, 2024
1 parent 0a78595 commit 6e25a52
Showing 1 changed file with 25 additions and 20 deletions.
45 changes: 25 additions & 20 deletions coordinator/scripts/calculate_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,10 @@ def process_lats(lats):
df['latsS'] = df.lats / 10**3
df['pDate'] = df.time.values.astype('datetime64[ns]')
df = df[df.time > 1609459200000] # Filter out (corrupt) times before 2021
# trim first <trim_samples> samples from dataframe
if 'TRIM_SAMPLES' in environ:
trim_samples = int(environ['TRIM_SAMPLES'])
df = df[df.time > df[0][0] + trim_samples*(block_time_ms / 1000)*one_sec]
dat = df.groupby(by=MyBinnerTime(expression=df.pDate, resolution='s', df=df, label='pDate'), agg={'count': 'count', 'lats': vaex.agg.list('lats')})
dat['lats'] = dat['lats'].apply(process_lats)

Expand Down Expand Up @@ -236,7 +240,7 @@ def process_lats(lats):
lat_99999.append(tps_its[2][1][idx][2])
idx += 1
current += datetime.timedelta(seconds=1)

dat = df.groupby(df.latsS, agg='count')
lats = dat.values
lat_max = df.max(df.latsS)
Expand All @@ -262,11 +266,11 @@ def process_lats(lats):
lat_lines.append({"lats":lat_99999, "title":"99.999%", "freq": 1})

periods = []

idx = 0
tps_target_files = [join('outputs',x) for x in listdir('outputs') \
if 'tps_target_' in x and 'hdf5' not in x]
if len(tps_target_files) > 0:
if len(tps_target_files) > 0:
t_index = pandas.date_range(start=begin- datetime.timedelta(seconds=5), end=end, freq='1s')
exports = 0
for f in tps_target_files:
Expand All @@ -285,7 +289,7 @@ def process_lats(lats):
exports = exports + 1
else:
print('{} has no rows', f)

if exports > 0:
df2 = vaex.open('outputs/*-tps_target_*.txt.hdf5')
df2['pDate'] = df2['index']
Expand All @@ -296,11 +300,11 @@ def process_lats(lats):
dat3.drop('tps_target', inplace=True)
dat3.drop('pDate', inplace=True)
dat2.join(dat3, inplace=True)

its = dat2.to_items()
tps_target = make_tps_target_series_line(its, begin, end, (lambda its,idx: its[0][1][idx].astype(datetime.datetime)), (lambda its,idx: its[1][1][idx]))
periods = extract_tps_target_periods(its, begin, end, (lambda its,idx: its[0][1][idx].astype(datetime.datetime)), (lambda its,idx: its[1][1][idx]), (lambda its,idx: its[3][1][idx]))

tps_lines.append({"tps":tps_target, "title":"Loadgen target", "freq": 1, "ma": False})

prev_lat99 = 0
Expand All @@ -326,6 +330,13 @@ def process_lats(lats):
elbow_latmean.append(prev_latmean)
elbow_lat99.append(prev_lat99)
elbow_lat99999.append(prev_lat99999)
elif 'TRIM_SAMPLES' in environ :
## Lob off (configurable) more "warm up" samples
trim_samples = int(environ['TRIM_SAMPLES'])
for i in range(len(tps_lines)):
tps_lines[i]["tps"] = tps_lines[i]["tps"][trim_samples:]
for i in range(len(lat_lines)):
lat_lines[i]["lats"] = lat_lines[i]["lats"][trim_samples:]

if archiver_based:
for output_file in output_files:
Expand Down Expand Up @@ -356,13 +367,7 @@ def process_lats(lats):
while len(lat_lines[i]["lats"]) > 0 and int(lat_lines[i]["lats"][-1]) == 0:
lat_lines[i]["lats"].pop()

## Lob off (configurable) more "warm up" samples
if 'TRIM_SAMPLES' in environ:
trim_samples = int(environ['TRIM_SAMPLES'])
for i in range(len(tps_lines)):
tps_lines[i]["tps"] = tps_lines[i]["tps"][trim_samples:]
for i in range(len(lat_lines)):
lat_lines[i]["lats"] = lat_lines[i]["lats"][trim_samples:]



## Create throughput histogram
Expand Down Expand Up @@ -480,7 +485,7 @@ def dev_to_val(dev):
if len(colors) > j:
color = colors[j]
ax.plot(tps_time, tps_ma, label='{} ({}ms MA)'.format(tps_line["title"],tps_ma_ms), color=color)



max = max * 1.02
Expand Down Expand Up @@ -572,7 +577,7 @@ def dev_to_val(dev):
if len(markers) > i:
marker = markers[i]
ax.plot(elbow_tps, yy, label=titles[i], color=color, marker=marker)

max = max * 1.02

ax.set_ylabel('Latency (ms)')
Expand All @@ -584,13 +589,13 @@ def dev_to_val(dev):
# TODO: Find proper way of finding peak TPS range. None of this is working
# accurately
# for yy in y:
# delta_ma_tmp = []
# delta_ma_tmp = []
# pf_x = x
# pf_y = yy
# while math.isnan(pf_y[-1]):
# pf_y = pf_y[:-1]
# pf_x = pf_x[:-1]

# while math.isnan(pf_y[1]):
# pf_y = pf_y[1:]
# pf_x = pf_x[1:]
Expand Down Expand Up @@ -625,16 +630,16 @@ def dev_to_val(dev):
# peak_lb_idx = 0
# if peak_ub_idx < 0:
# peak_ub_idx = 0

# peak_lb = pf_x[peak_lb_idx]
# peak_ub = pf_x[peak_ub_idx]
# peak_found = True
# if delta_ma_above < 8:
# peak_found = False

# if peak_found:
# break

# if peak_ub > 0:
# ax.set_title('Latency/Throughput Elbow\nDetected peak {}-{} TX/s'.format(peak_lb, peak_ub))

Expand Down

0 comments on commit 6e25a52

Please sign in to comment.