-
Notifications
You must be signed in to change notification settings - Fork 2
/
covid_toll_tool.py
executable file
·455 lines (356 loc) · 26.8 KB
/
covid_toll_tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
#!/usr/bin/env python3
"""
Use OWID's data to create PNG charts and CSV datasets of all-cause mortality compared to COVID-19 mortality, for a given
country and year, in the context of vaccinations count, virus testing, restrictions stringency and the country's
all-cause mortality in preceding years.
"""
__author__ = "Maciej Sieczka <msieczka@sieczka.org>"
import argparse
import sys
import pandas as pd
import matplotlib.pyplot as mpyplot
import matplotlib.dates as mdates
import matplotlib.ticker as mticker
from datetime import date as ddate
from datetime import datetime as ddatetime
if sys.version_info < (3, 9):
print("Python 3.9+ is required to run this script.")
sys.exit(1)
def main(country, year, if_list_countries, if_interpolate):
morta_death_cols_bgd = ['deaths_2010_all_ages', 'deaths_2011_all_ages', 'deaths_2012_all_ages',
'deaths_2013_all_ages', 'deaths_2014_all_ages', 'deaths_2015_all_ages',
'deaths_2016_all_ages', 'deaths_2017_all_ages', 'deaths_2018_all_ages',
'deaths_2019_all_ages']
morta_death_cols_all = morta_death_cols_bgd + ['deaths_2020_all_ages', 'deaths_2021_all_ages',
'deaths_2022_all_ages']
morta_cols = ['location', 'date', 'time', 'time_unit'] + morta_death_cols_all
covid_cols = ['location', 'date', 'new_cases_smoothed', 'new_tests_smoothed', 'new_deaths', 'stringency_index',
'people_vaccinated', 'people_fully_vaccinated', 'total_boosters', 'population']
df_covid = pd.read_csv("./owid-covid-data.csv", parse_dates=['date'], usecols=covid_cols).reindex(
columns=covid_cols)
df_morta = pd.read_csv("./excess_mortality.csv", parse_dates=['date'], usecols=morta_cols).reindex(
columns=morta_cols)
common_countries = sorted(set(df_morta['location']) & set(df_covid['location']))
if if_list_countries:
list_countries(common_countries)
elif country == 'ALL':
for country in common_countries:
orchestrate(country, df_covid, df_morta, year, morta_death_cols_bgd, morta_death_cols_all, if_interpolate)
elif country in common_countries:
orchestrate(country, df_covid, df_morta, year, morta_death_cols_bgd, morta_death_cols_all, if_interpolate)
else:
print("Country '{}' is not present in both input datasets.\n".format(country))
list_countries(common_countries)
def list_countries(common_countries):
print("Please set '--country' to one of the following {} countries present in both input datasets, or 'ALL', to "
"process them all one by one: {}.".
format(len(common_countries), ', '.join("'{}'".format(c) for c in common_countries)))
# Charts for the adjacent years (2020, 2021, 2022) overlap by one week, so that e.g. the last week of data on the 2020's
# chart is a copy of the 1st week on the 2021's chart. Effectively, there are 54 weeks of data on a chart of the
# 53-weeks long 2020, and 53 weeks of data on charts of 52 weeks-long 2021 and 2022.
#
# All-cause mortality weekly data series in OWID's excess_mortality.csv for 2010-2019 are all 52 weeks-long. To derive
# and draw 54 weeks of min, max and mean historical 2010-2019 mortality for a 2020 chart, I append each such year's
# death count series with a following year's 1st 2 weeks - e.g. death count 2010's series is appended with the 1st 2
# weeks of 2011, 2011's series with the 1st 2 weeks of 2012 etc. For 2021 and 2022 charts, which are 53 weeks-long, only
# one such week is appended. In case of 2015 (which has 53 weeks, but its death count data series is capped at week 52
# anyway in excess_mortality.csv) death count for the missing 53rd week is interpolated linearly from 2015's 52nd week
# and the 1st week of 2016.
def orchestrate(country, df_covid, df_morta, year, morta_death_cols_bgd, morta_death_cols_all, if_interpolate):
# Select only the data of a specific country.
df_covid_country = df_covid[df_covid['location'] == country].copy().reset_index(drop=True)
df_morta_country = df_morta[df_morta['location'] == country].copy().reset_index(drop=True)
morta_death_cols_bgd_notnull = [c for c in morta_death_cols_bgd if df_morta_country[c].notnull().any()]
morta_year_bgd_notnull_min = morta_death_cols_bgd_notnull[0].split('_')[1]
morta_year_bgd_notnull_max = morta_death_cols_bgd_notnull[-1].split('_')[1]
if df_morta_country['time_unit'].nunique() == 1:
time_unit = df_morta_country['time_unit'].unique()[0]
# Create ISO-week date index, starting at the end (7 = Sunday) of the 1st week of a year, and ending at the end
# of the 1st week of the following year. So that there's a 1 week overlap between charts for subsequent years -
# (eg. a 2020 chart will also have the 1st week of 2021). By ISO specification December 28th is always in the
# last week of the year.
dates_weekly_one = [ddatetime.fromisocalendar(year=year, week=w, day=7).strftime('%Y-%m-%d')
for w in range(1, ddate(year=year, month=12, day=28).isocalendar().week + 1)
] + [ddatetime.fromisocalendar(year=year + 1, week=1, day=7).strftime('%Y-%m-%d')]
df_dates_weekly_one = pd.DataFrame(dates_weekly_one, columns=['date'], dtype='datetime64[ns]')
df_morta_country_all, df_morta_country_one = process_morta_df(df_morta_country, df_dates_weekly_one, time_unit,
morta_death_cols_all, country)
df_covid_country_all, df_covid_country_one = process_covid_df(df_covid_country, df_dates_weekly_one, time_unit,
if_interpolate)
df_merge_country_one = merge_covid_morta_dfs(df_covid_country_one, df_morta_country_one, year,
morta_death_cols_bgd)
# Find the Y axis bottom and top value in all-time death counts for a given country; to have an identical Y axis
# range on that country's charts in different years. For some countries the number of non-covid deaths in a
# given year (e.g. Belgium in 2020) happens to be lower than the lowest number of deaths from all causes in
# previous years. For some, it's higher than the highest number of deaths in previous years - probably due to
# borked data, but anyway (e.g. Kyrgyzstan in 2020 - see https://github.com/owid/covid-19-data/issues/1550).
deaths_noncovid_all = df_morta_country_all.set_index('date')['deaths'].sub(
df_covid_country_all.set_index('date')['new_deaths'])
# This conditional is due to `deaths_noncovid_all` being all NaN under certain conditions. E.g. Greenland didn't
# have any covid deaths until 2021-12-27, and its all-cause mortality ended in Sep 2021, as of
# excess_mortality.csv at d4dfef79a8.
if deaths_noncovid_all.isnull().all():
y_min = df_morta_country_all['deaths'].min()
y_max = df_morta_country_all['deaths'].max()
else:
y_min = min(deaths_noncovid_all.min(), df_morta_country_all['deaths'].min())
y_max = max(deaths_noncovid_all.max(), df_morta_country_all['deaths'].max())
plot_weekly(df_merge_country_one, country, year, morta_year_bgd_notnull_min, morta_year_bgd_notnull_max,
time_unit, y_min, y_max)
def process_morta_df(df_morta_country, df_dates_weekly_one, time_unit, morta_death_cols_all, country):
morta_year_all_min = int(morta_death_cols_all[0].split('_')[1])
morta_year_all_max = int(morta_death_cols_all[-1].split('_')[1])
# From morta_year_all_min to morta_year_all_max. So that there is data overlap at year boundaries (e.g. for
# 2015 52 -> 53 weeks interpolation).
# NOTE: pd.date_range(start=str(morta_year_all_min), end=str(morta_year_all_max+2), freq='W') would be
# simpler, but we need to start at 1st ISO week, while e.g. pd.date_range(start='2010', end='2021', freq='W')
# returns '2010-01-03' as the 1st week of 2010, whereas per ISO-week convention (see e.g.
# pd.date_range(start='2010', end='2011', freq='W')[0].isocalendar()) it's actually the 53rd week of 2009.
dates_weekly_all = []
for y in range(morta_year_all_min, morta_year_all_max + 1):
for w in range(1, ddate(year=y, month=12, day=28).isocalendar().week + 1):
dates_weekly_all.append(ddatetime.fromisocalendar(year=y, week=w, day=7).strftime('%Y-%m-%d'))
# Append 1st 4 weeks of the following year, to make sure dates_weekly_all is long enough for
# df_dates_weekly_one_weeks_count later on.
for w in range(1, 5):
dates_weekly_all.append(
ddatetime.fromisocalendar(year=morta_year_all_max + 1, week=w, day=7).strftime('%Y-%m-%d'))
if time_unit == 'monthly':
# From morta_year_all_min to morta_year_all_max. So that there is data overlap at year boundaries for
# monthly -> weekly interpolation.
dates_monthly_all = pd.date_range(start=str(morta_year_all_min), end=str(morta_year_all_max + 1), freq='M')
df_morta_country_all_monthly = pd.DataFrame(dates_monthly_all, columns=['date'], dtype='datetime64[ns]')
# Merge all morta_death_cols_all into one.
df_morta_country_all_monthly['deaths'] = pd.concat(
[df_morta_country[c][0:12] for c in df_morta_country[morta_death_cols_all]],
axis='rows', ignore_index=True)
# Up-sample and interpolate monthly mortality data to weekly so that it can be used with other weekly data.
df_morta_country_all_monthly = df_morta_country_all_monthly.set_index('date').resample(rule='W').first(). \
interpolate(limit_area='inside').reset_index()
# Align the up-sampled monthly->weekly all-cause mortality data with the weekly date index which fully
# encompasses morta_year_all_min up to morta_year_all_max.
df_dates_weekly_all = pd.DataFrame(dates_weekly_all, columns=['date'], dtype='datetime64[ns]')
df_morta_country_all = pd.merge(left=df_dates_weekly_all, right=df_morta_country_all_monthly, on='date',
how='left')
elif time_unit == 'weekly':
df_morta_country_all = pd.DataFrame(dates_weekly_all, columns=['date'], dtype='datetime64[ns]')
# Merge all morta_death_cols_all columns into one.
# NOTE: Eg. pd.concat([df_morta_country[c].dropna() for c in df_morta_country[morta_death_cols_all]],
# axis='rows', ignore_index=True) would be simpler, but column 'deaths_2015_all_ages' which should have 53
# records has only 52, so we have to take NaN as ['deaths_2015_all_ages'][52] and interpolate it from its
# neighbours.
deaths = []
for y in range(morta_year_all_min, morta_year_all_max + 1):
w = ddate(year=y, month=12, day=28).isocalendar().week
c = 'deaths_{}_all_ages'.format(str(y))
deaths = deaths + df_morta_country[c][0:w].to_list()
df_morta_country_all = pd.concat([df_morta_country_all, pd.DataFrame(deaths, columns=['deaths'])],
axis='columns')
# Interpolate NaNs from nearest neighbours. One such record for sure is 2016-01-03 in all countries' data
# (53rd week of 2015), but maybe some countries have more. So interpolating it all away, just in case.
df_morta_country_all['deaths'].interpolate(limit_area='inside', inplace=True)
# Put df_morta_country back together the way we need it for further processing.
df_morta_country_one = df_dates_weekly_one.copy()
df_morta_country_one['location'] = country
df_morta_country_one['time_unit'] = time_unit
df_morta_country_one['time'] = df_morta_country_one['date'].dt.isocalendar().week
df_dates_weekly_one_weeks_count = len(df_dates_weekly_one)
for y in range(morta_year_all_min, morta_year_all_max + 1):
col = 'deaths_{}_all_ages'.format(str(y))
date_start = ddatetime.fromisocalendar(year=y, week=1, day=7).strftime('%Y-%m-%d')
date_range = pd.date_range(start=date_start, periods=df_dates_weekly_one_weeks_count, freq='W')
df_morta_country_one[col] = df_morta_country_all[df_morta_country_all['date'].isin(date_range)]['deaths']. \
to_list()
return df_morta_country_all, df_morta_country_one
def process_covid_df(df_covid_country, df_dates_weekly_one, time_unit, if_interpolate):
if if_interpolate:
# Fill any NaN values with interpolation between the 2 known closest values. Zeros are treated as real data and
# left intact. Eg. vaccination counts and stringency index data are notoriously missing, Mexico and Ecuador had
# single missing records of 'new_deaths' at d2e597487d etc.
df_covid_country['people_vaccinated'].interpolate(limit_area='inside', inplace=True)
df_covid_country['people_fully_vaccinated'].interpolate(limit_area='inside', inplace=True)
df_covid_country['total_boosters'].interpolate(limit_area='inside', inplace=True)
df_covid_country['stringency_index'].interpolate(limit_area='inside', inplace=True)
df_covid_country['new_cases_smoothed'].interpolate(limit_area='inside', inplace=True)
df_covid_country['new_tests_smoothed'].interpolate(limit_area='inside', inplace=True)
df_covid_country['new_deaths'].interpolate(limit_area='inside', inplace=True)
# NOTE: OWID's positive_rate multiplied by 100 usually equals my positive_test_percent. However, there are
# countries for which OWID derive positive_rate in a different way than "JHU cases divided by OWID tests". As of
# writing, this applies to 17 of those 110 countries my script covers at present. For more information see OWID's
# team replies in https://github.com/owid/covid-19-data/issues/2333.
# TODO: Decide whether to use OWID's `positive_rate * 100`, or to stick with `new_cases_smoothed /
# new_tests_smoothed * 100`. For now I'll go with the latter, as it allows me to easily spot countries whose cases
# or tests count are weird - like Brazil.
df_covid_country['positive_test_percent'] = \
df_covid_country['new_cases_smoothed'] / df_covid_country['new_tests_smoothed'] * 100
df_covid_country['people_vaccinated_percent'] = \
df_covid_country['people_vaccinated'] / df_covid_country['population'] * 100
df_covid_country['people_fully_vaccinated_percent'] = \
df_covid_country['people_fully_vaccinated'] / df_covid_country['population'] * 100
df_covid_country['total_boosters_percent'] = \
df_covid_country['total_boosters'] / df_covid_country['population'] * 100
# Resample the daily covid data to match the weekly mortality data, with week date on Sunday. resample().sum()
# removes any input non-numeric columns, ie. `location` here, but we don't need it. It also "hides" the `date`
# column by setting an index on it, but we are going to need this column later on, thus bringing it back with
# reset_index().
df_covid_country_all = df_covid_country.resample(rule='W', on='date').agg(
{'new_deaths': lambda x: x.sum(min_count=1),
'new_cases_smoothed': lambda x: x.sum(min_count=1),
'new_tests_smoothed': lambda x: x.sum(min_count=1),
'positive_test_percent': 'mean',
'stringency_index': 'mean',
'people_vaccinated': 'mean',
'people_fully_vaccinated': 'mean',
'total_boosters': 'mean',
'people_vaccinated_percent': 'mean',
'people_fully_vaccinated_percent': 'mean',
'total_boosters_percent': 'mean',
'population': 'mean'}
).reset_index()
if if_interpolate:
# Interpolate again - now between the weekly values. Due to possible (although very rare) time interval
# irregularities in the OWID's data, which may cause weekly mean of such non-daily records to be NaN. Eg.
# Portugal used to have a couple bi-weekly records of 'stringency_index' (see
# https://github.com/owid/covid-19-data/issues/2258). There was a similar problem with Estonia, Greece and
# Latvia at that time. I haven't actually observed such issues with data series other than 'stringency index',
# but let's interpolate them away as well, just in case. This won't do harm - if they don't have NaN records,
# interpolation will just leave them intact.
df_covid_country_all['people_vaccinated'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['people_fully_vaccinated'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['total_boosters'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['people_vaccinated_percent'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['people_fully_vaccinated_percent'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['total_boosters_percent'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['stringency_index'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['new_cases_smoothed'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['new_tests_smoothed'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['positive_test_percent'].interpolate(limit_area='inside', inplace=True)
df_covid_country_all['new_deaths'].interpolate(limit_area='inside', inplace=True)
# If all-cause mortality data resolution is monthly, we need to adjust daily covid mortality data accordingly.
# TODO: Come up with something neater than this 'temp' name.
if time_unit == 'monthly':
temp = df_covid_country.resample(rule='M', on='date').agg({'new_deaths': lambda x: x.sum(min_count=1)}). \
resample(rule='W').first(). \
interpolate(limit_area='inside').reset_index()
# Align the up-sampled daily->monthly->weekly covid mortality data with the df_covid_country_all's date index,
# and replace 'new_deaths' there with daily->monthly->weekly data.
df_covid_country_all['new_deaths'] = pd.merge(
left=df_covid_country_all[['date']], right=temp, on='date', how='left')['new_deaths']
# Take only rows of the year specified on command line.
df_covid_country_one = pd.merge(left=df_dates_weekly_one, right=df_covid_country_all, on='date', how='left')
return df_covid_country_all, df_covid_country_one
def merge_covid_morta_dfs(df_covid_country_one, df_morta_country, year, morta_death_cols_bgd):
# Merge both datasets now that they are complete and aligned on same dates.
df_merge_country_one = pd.merge(df_morta_country, df_covid_country_one, how='inner')
df_merge_country_one['deaths_min'] = df_merge_country_one[morta_death_cols_bgd].min(axis='columns')
df_merge_country_one['deaths_max'] = df_merge_country_one[morta_death_cols_bgd].max(axis='columns')
df_merge_country_one['deaths_mean'] = df_merge_country_one[morta_death_cols_bgd].mean(axis='columns')
# NOTE: At certain dates, for some countries, one-off upstream corrections in covid mortality counts sometimes
# happen, leading to over- or under-shoots in deaths_noncovid - https://github.com/owid/covid-19-data/issues/1550.
df_merge_country_one['deaths_noncovid'] = df_merge_country_one['deaths_{}_all_ages'.format(str(year))].sub(
df_merge_country_one['new_deaths'], fill_value=None)
return df_merge_country_one.round(decimals=3)
# TODO: Watch out for the status of 'x_compat'. It's not documented where I'd expect to be [1] although mentioned few
# times in [2]. If it's going to be depreciated, a workaround will be needed as e.g. per [3], [4].
# [1]https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.plot.html
# [2]https://pandas.pydata.org/pandas-docs/stable/user_guide/visualization.html
# [3]https://stackoverflow.com/questions/12945971/pandas-timeseries-plot-setting-x-axis-major-and-minor-ticks-and-labels
# [4]https://stackoverflow.com/questions/30133280/pandas-bar-plot-changes-date-format
def plot_weekly(df_merge_country_one, country, year, morta_year_bgd_notnull_min, morta_year_bgd_notnull_max, time_unit,
y_min, y_max):
fig, axs = mpyplot.subplots(figsize=(13.55, 5.75)) # Create an empty matplotlib figure and axes.
axs2 = axs.twinx()
df_merge_country_one.plot(x_compat=True, kind='line', use_index=True, grid=True, rot=50,
color=['deepskyblue', 'dimgrey', 'tab:red', 'black', 'black'],
style=[':', ':', ':', '-', '--'],
ax=axs, x='date', y=['deaths_min', 'deaths_mean', 'deaths_max',
'deaths_{}_all_ages'.format(str(year)), 'deaths_noncovid'])
df_merge_country_one.plot(x_compat=True, kind='line', use_index=True, grid=False, rot=50,
color=['fuchsia', 'cornflowerblue', 'mediumspringgreen', 'mediumspringgreen',
'mediumspringgreen'],
style=['-', '-', '--', '-', '-.'],
ax=axs2, x='date', y=['stringency_index', 'positive_test_percent',
'people_vaccinated_percent', 'people_fully_vaccinated_percent',
'total_boosters_percent'])
axs.fill_between(df_merge_country_one['date'], df_merge_country_one['deaths_min'],
df_merge_country_one['deaths_max'], alpha=0.25, color='silver')
axs.legend(['{} lowest death count in {}-{} from all causes'.format(
time_unit, morta_year_bgd_notnull_min, morta_year_bgd_notnull_max),
'{} average death count in {}-{} from all causes'.format(
time_unit, morta_year_bgd_notnull_min, morta_year_bgd_notnull_max),
'{} highest death count in {}-{} from all causes'.format(
time_unit, morta_year_bgd_notnull_min, morta_year_bgd_notnull_max),
'{} death count in {} from all causes'.format(
time_unit, year),
'{} death count in {} from all causes EXCLUDING deaths attributed to COVID-19'.format(
time_unit, year),
'range between highest and lowest {} death count from all causes in {}-{}'.format(
time_unit, morta_year_bgd_notnull_min, morta_year_bgd_notnull_max)],
title='left Y axis:', fontsize='small', handlelength=1.6, loc='upper left',
bbox_to_anchor=(-0.0845, 1.3752))
axs2.legend(['restrictions stringency: 0 ~ none, 100 ~ full lockdown',
'percent of positive results, aka "cases", in all COVID-19 tests conducted that week',
'percent of the country\'s populace who received at least 1 vaccine dose',
'percent of the country\'s populace who received all doses according to vaccination protocol',
'total booster doses administered, counted as the country\'s populace percentage'],
title='right Y axis:', fontsize='small', handlelength=1.6, loc='upper right',
bbox_to_anchor=(1.057, 1.375))
axs.xaxis.set_major_locator(mdates.WeekdayLocator(interval=1, byweekday=6))
axs.set_xlabel(xlabel="date (ISO week Sunday)", loc="right")
axs2.set(ylabel="percent",
xlim=[df_merge_country_one['date'].to_list()[0], df_merge_country_one['date'].to_list()[-1]],
ylim=[-0.25, 100.5])
axs2.yaxis.set_major_locator(mticker.MultipleLocator(10))
axs.set(ylabel="count",
xlim=[df_merge_country_one['date'].to_list()[0], df_merge_country_one['date'].to_list()[-1]],
ylim=[y_min - (abs(y_max) - abs(y_min)) * 0.05, y_max + (abs(y_max) - abs(y_min)) * 0.05])
axs2.set_xlabel(xlabel="date (ISO week Sunday)", loc="right")
# Put the axs2 (the right Y axis) below the legend boxes. By default it would overlap the axs'es (left) legend box.
# See https://github.com/matplotlib/matplotlib/issues/3706.
legend = axs.get_legend()
axs.get_legend().remove()
axs2.add_artist(legend)
axs.xaxis.set_major_formatter(mdates.DateFormatter('%d.%m'))
mpyplot.title("{}, {}".format(country, year), fontweight="bold", loc='right')
mpyplot.figtext(0.065, 0,
"This chart was downloaded from https://github.com/czka/covid_toll_tool.\n"
"Chart's data source is OWID (Our World in Data), https://github.com/owid/covid-19-data.\n"
"For more information about the data presented on this chart please see "
"https://github.com/czka/covid_toll_tool/blob/main/README.md.",
fontsize=9, va="bottom", ha="left", linespacing=1.5, fontstyle='italic')
# mpyplot.tight_layout(pad=1)
fig.savefig('{}_{}.png'.format(country.replace(' ', '_'), year), bbox_inches="tight", pad_inches=0.05,
pil_kwargs={'optimize': True})
df_merge_country_one.to_csv('{}_{}.csv'.format(country.replace(' ', '_'), year), index=False)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
add_help=False,
description=__doc__,
epilog="The output are a PNG chart and CSV dataset for the '--country' and the '--year' specified on the "
"command line - e.g. 'Poland_2020.png' and 'Poland_2020.csv'.")
parser._optionals.title = 'Arguments'
mutually_exclusive = parser.add_mutually_exclusive_group(required=True)
mutually_exclusive.add_argument('--list_countries',
action='store_true',
dest='if_list_countries',
help='List countries available in both input CSV files.')
mutually_exclusive.add_argument('--country',
help="Country to process - e.g. 'Poland'. Use 'ALL' to process all countries one by"
" one.")
parser.add_argument('--year',
required='--country' in sys.argv,
type=int,
help="Year to process - e.g. '2020'.")
parser.add_argument('--interpolate',
action='store_true',
dest='if_interpolate',
default=False,
help='Interpolate data gaps present in the columns the script reads from the input '
'owid-covid-data.csv, linearly from the missing data\'s nearest neighbours. For the sake '
'of a more complete chart, but at a cost of a less accurate representation of some of the '
'input data. By default interpolation is disabled.')
parser.add_argument('--help', '-h',
action='help',
help='Show this help message.')
args = parser.parse_args()
main(args.country, args.year, args.if_list_countries, args.if_interpolate)