# Libraries
import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from dateutil.parser import parse
from pandas import Series
# Reading the dataset
covid19 = pd.read_csv("actas-defuncion-covid-19-cdmx1.csv",parse_dates=['FECHA'], index_col = "FECHA")
covid19.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
EDAD | SEMANA | MES | RAZON | ACTA | |
---|---|---|---|---|---|
FECHA | |||||
2020-03-18 | 41 AÑOS | 12 | 3 | CHOQUE SEPTICO, NEUMONIA POR COVID 19 POR SARS... | 7129 |
2020-03-23 | 61 AÑOS | 13 | 3 | INSUFICIENCIA RESPIRATORIA AGUDA, NEUMONIA VIR... | 4459 |
2020-03-26 | 60 AÑOS | 13 | 3 | SINDROME DE INSUFICIENCIA RESPIRATORIA AGUDA, ... | 4591 |
2020-03-26 | 37 AÑOS | 13 | 3 | NEUMONIA POR CORONAVIRUS | 7879 |
2020-03-26 | 63 AÑOS | 13 | 3 | CERVICOVAGINITIS PURULENTA, CARCINOMA EPIDERMO... | 7829 |
number_by_date = covid19.drop(['EDAD', 'SEMANA', 'MES', 'RAZON', 'ACTA'], axis=1)
number_by_date
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
FECHA |
---|
2020-03-18 |
2020-03-23 |
2020-03-26 |
2020-03-26 |
2020-03-26 |
... |
2020-05-12 |
2020-05-12 |
2020-05-12 |
2020-05-12 |
2020-05-12 |
4579 rows × 0 columns
number_by_date['MUERTES'] = 0
number_by_date
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
MUERTES | |
---|---|
FECHA | |
2020-03-18 | 0 |
2020-03-23 | 0 |
2020-03-26 | 0 |
2020-03-26 | 0 |
2020-03-26 | 0 |
... | ... |
2020-05-12 | 0 |
2020-05-12 | 0 |
2020-05-12 | 0 |
2020-05-12 | 0 |
2020-05-12 | 0 |
4579 rows × 1 columns
number_by_date.drop(number_by_date.head(2).index, inplace=True)
number_by_date = number_by_date.groupby('FECHA').count()
number_by_date.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
MUERTES | |
---|---|
FECHA | |
2020-03-26 | 3 |
2020-03-27 | 2 |
2020-03-28 | 1 |
2020-03-29 | 2 |
2020-03-30 | 3 |
covid19_oficial = pd.read_csv("200521COVID19MEXICO.csv", sep = ",",parse_dates = ["FECHA_DEF"], encoding ='latin1')
covid19_oficial.head(10)
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
FECHA_ACTUALIZACION | ID_REGISTRO | ORIGEN | SECTOR | ENTIDAD_UM | SEXO | ENTIDAD_NAC | ENTIDAD_RES | MUNICIPIO_RES | TIPO_PACIENTE | ... | CARDIOVASCULAR | OBESIDAD | RENAL_CRONICA | TABAQUISMO | OTRO_CASO | RESULTADO | MIGRANTE | PAIS_NACIONALIDAD | PAIS_ORIGEN | UCI | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2020-05-21 | 11e989 | 2 | 3 | 27 | 2 | 27 | 27 | 4 | 2 | ... | 2 | 2 | 2 | 2 | 1 | 1 | 99 | México | 99 | 2 |
1 | 2020-05-21 | 1aad65 | 2 | 4 | 19 | 2 | 5 | 5 | 18 | 2 | ... | 2 | 2 | 2 | 2 | 99 | 1 | 99 | México | 99 | 1 |
2 | 2020-05-21 | 04f631 | 2 | 4 | 14 | 1 | 14 | 14 | 67 | 1 | ... | 2 | 2 | 2 | 2 | 99 | 1 | 99 | México | 99 | 97 |
3 | 2020-05-21 | 02556b | 2 | 4 | 15 | 1 | 15 | 15 | 110 | 2 | ... | 2 | 2 | 2 | 2 | 99 | 1 | 99 | México | 99 | 2 |
4 | 2020-05-21 | 0356d5 | 2 | 4 | 9 | 1 | 9 | 9 | 5 | 2 | ... | 2 | 2 | 2 | 1 | 99 | 1 | 99 | México | 99 | 2 |
5 | 2020-05-21 | 1d2dfb | 2 | 4 | 25 | 2 | 14 | 25 | 1 | 1 | ... | 2 | 2 | 2 | 2 | 99 | 1 | 99 | México | 99 | 97 |
6 | 2020-05-21 | 1b3e2b | 2 | 4 | 9 | 2 | 7 | 9 | 2 | 1 | ... | 2 | 2 | 2 | 2 | 99 | 1 | 99 | México | 99 | 97 |
7 | 2020-05-21 | 0c0eef | 2 | 4 | 21 | 1 | 21 | 21 | 114 | 2 | ... | 2 | 1 | 2 | 2 | 99 | 1 | 99 | México | 99 | 2 |
8 | 2020-05-21 | 043ea2 | 2 | 4 | 27 | 2 | 27 | 27 | 4 | 1 | ... | 2 | 2 | 2 | 2 | 99 | 1 | 99 | México | 99 | 97 |
9 | 2020-05-21 | 0bd39a | 2 | 4 | 8 | 1 | 8 | 8 | 17 | 1 | ... | 2 | 2 | 2 | 2 | 99 | 1 | 99 | México | 99 | 97 |
10 rows × 35 columns
temp = covid19_oficial[['FECHA_DEF', 'ENTIDAD_RES']]
temp = temp.set_index('FECHA_DEF')
temp
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
ENTIDAD_RES | |
---|---|
FECHA_DEF | |
2020-04-27 | 27 |
2020-04-03 | 5 |
9999-99-99 | 14 |
2020-04-20 | 15 |
9999-99-99 | 9 |
... | ... |
2020-05-14 | 15 |
9999-99-99 | 26 |
9999-99-99 | 26 |
9999-99-99 | 15 |
9999-99-99 | 9 |
201838 rows × 1 columns
from_cdmx = temp.loc[(temp['ENTIDAD_RES'] == 9)] #Number nine corresponds to CDMX as stated in the data dictionary
from_cdmx
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
ENTIDAD_RES | |
---|---|
FECHA_DEF | |
9999-99-99 | 9 |
9999-99-99 | 9 |
9999-99-99 | 9 |
9999-99-99 | 9 |
2020-03-22 | 9 |
... | ... |
2020-05-05 | 9 |
9999-99-99 | 9 |
9999-99-99 | 9 |
9999-99-99 | 9 |
9999-99-99 | 9 |
46594 rows × 1 columns
from_cdmx.drop(['ENTIDAD_RES'], axis=1, inplace=True, errors='ignore')
C:\Users\User\Anaconda3\lib\site-packages\pandas\core\frame.py:3997: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
errors=errors,
from_cdmx
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
FECHA_DEF |
---|
9999-99-99 |
9999-99-99 |
9999-99-99 |
9999-99-99 |
2020-03-22 |
... |
2020-05-05 |
9999-99-99 |
9999-99-99 |
9999-99-99 |
9999-99-99 |
46594 rows × 0 columns
from_cdmx['MUERTES'] = 0
C:\Users\User\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
"""Entry point for launching an IPython kernel.
from_cdmx = from_cdmx.groupby('FECHA_DEF').count()
from_cdmx.head()
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
</style>
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
MUERTES | |
---|---|
FECHA_DEF | |
2020-03-16 | 1 |
2020-03-22 | 1 |
2020-03-23 | 1 |
2020-03-25 | 1 |
2020-03-26 | 3 |
from_cdmx.drop(from_cdmx.tail(1).index, inplace=True) #9999-99-99 date is dropped
from_cdmx = from_cdmx.reset_index()
from_cdmx = from_cdmx.loc[(from_cdmx['FECHA_DEF'] >= '2020-03-26') & (from_cdmx['FECHA_DEF'] <= '2020-05-12')]
from_cdmx = from_cdmx.set_index('FECHA_DEF')
print(from_cdmx.head(1))
print(from_cdmx.tail(1))
MUERTES
FECHA_DEF
2020-03-26 3
MUERTES
FECHA_DEF
2020-05-12 60
from_cdmx = from_cdmx.reset_index()
from_cdmx['FECHA_DEF'] = pd.to_datetime(from_cdmx['FECHA_DEF'])
from_cdmx = from_cdmx.set_index('FECHA_DEF')
sns.set(rc={'figure.figsize':(14, 7)})
start, end = '2020-03', '2020-05'
fig, ax = plt.subplots()
ax.plot(from_cdmx.loc[start:end],
marker='o', markersize=8, linestyle='-', label='gob.mx')
ax.plot(number_by_date.loc[start:end],
marker='o', markersize=8, linestyle='-', label='contralacorrupcion.mx')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Deaths')
ax.set_title('COVID-19 death comparison in CDMX')
ax.legend()
<matplotlib.legend.Legend at 0x1946b7e5a58>