Skip to content

Commit

Permalink
who needs pandas anyway?
Browse files Browse the repository at this point in the history
  • Loading branch information
andre-merzky committed Jun 18, 2024
1 parent 24dd105 commit fd261c9
Showing 1 changed file with 24 additions and 14 deletions.
38 changes: 24 additions & 14 deletions src/radical/analytics/utils/plot.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@

import os
import sys
import glob
import functools
import pandas as pd
import matplotlib as mpl

import pandas as pd
import numpy as np
import matplotlib as mpl
import radical.utils as ru


Expand Down Expand Up @@ -74,7 +77,7 @@ def get_mplstyle(name):
# ------------------------------------------------------------------------------
#
def stack_transitions(series, tresource, to_stack):
'''Creates data frames for each metric and combines them into one data frame
'''Creates time series for each metric and combines them into one data frame
for alignment. Since transitions obviously happen at arbitrary times, the
timestamps for metric A may see no transitions for metric B. When using a
combined timeline, we end up with NaN entries for some metrics on most
Expand Down Expand Up @@ -103,19 +106,26 @@ def stack_transitions(series, tresource, to_stack):
that point in time.
'''

dfs = [pd.DataFrame(series[tresource][m], columns=['time', m])
for m in series[tresource]]
# find the global time line
glob_times = set()
for m,df in series[tresource].items():
for t,_ in df:
glob_times.add(t)

glob_times = sorted(glob_times)

# create a timeline for each metric, set missing values as NaN
tlines = dict()
tlines['time'] = glob_times
for m in series[tresource]:
tlines[m] = [np.nan] * len(glob_times)

# merge them into one data frame, creating a common time-line
merged = functools.reduce(lambda left, right:
pd.merge(left, right,
left_on='time',
right_on='time',
how='outer'), dfs)
# sort the global time line
merged.sort_values(by='time', inplace=True)
for t,v in series[tresource][m]:
t_idx = glob_times.index(t)
tlines[m][t_idx] = v

# fill in missing values (carry over previous ones)
# create dataframe and fill all NaN values with the previous valid value
merged = pd.DataFrame(tlines)
merged.fillna(method='ffill', inplace=True)

# stacked plotting and area filling don't play well together in matplotlib.
Expand Down

0 comments on commit fd261c9

Please sign in to comment.