GitHub - alejandropuerto/covid-19-death-comparison

# Libraries

import datetime
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from dateutil.parser import parse
from pandas import Series

Data from contralacorrupcion.mx

# Reading the dataset
covid19 = pd.read_csv("actas-defuncion-covid-19-cdmx1.csv",parse_dates=['FECHA'], index_col = "FECHA")
covid19.head()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	EDAD	SEMANA	MES	RAZON	ACTA
FECHA
2020-03-18	41 AÑOS	12	3	CHOQUE SEPTICO, NEUMONIA POR COVID 19 POR SARS...	7129
2020-03-23	61 AÑOS	13	3	INSUFICIENCIA RESPIRATORIA AGUDA, NEUMONIA VIR...	4459
2020-03-26	60 AÑOS	13	3	SINDROME DE INSUFICIENCIA RESPIRATORIA AGUDA, ...	4591
2020-03-26	37 AÑOS	13	3	NEUMONIA POR CORONAVIRUS	7879
2020-03-26	63 AÑOS	13	3	CERVICOVAGINITIS PURULENTA, CARCINOMA EPIDERMO...	7829

number_by_date = covid19.drop(['EDAD', 'SEMANA', 'MES', 'RAZON', 'ACTA'], axis=1)
number_by_date

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>


FECHA
2020-03-18
2020-03-23
2020-03-26
2020-03-26
2020-03-26
...
2020-05-12
2020-05-12
2020-05-12
2020-05-12
2020-05-12

4579 rows × 0 columns

number_by_date['MUERTES'] = 0

number_by_date

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	MUERTES
FECHA
2020-03-18	0
2020-03-23	0
2020-03-26	0
2020-03-26	0
2020-03-26	0
...	...
2020-05-12	0
2020-05-12	0
2020-05-12	0
2020-05-12	0
2020-05-12	0

4579 rows × 1 columns

number_by_date.drop(number_by_date.head(2).index, inplace=True)

number_by_date = number_by_date.groupby('FECHA').count()
number_by_date.head()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	MUERTES
FECHA
2020-03-26	3
2020-03-27	2
2020-03-28	1
2020-03-29	2
2020-03-30	3

Data from gob.mx

covid19_oficial = pd.read_csv("200521COVID19MEXICO.csv", sep = ",",parse_dates = ["FECHA_DEF"], encoding ='latin1')
covid19_oficial.head(10)

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	FECHA_ACTUALIZACION	ID_REGISTRO	ORIGEN	SECTOR	ENTIDAD_UM	SEXO	ENTIDAD_NAC	ENTIDAD_RES	MUNICIPIO_RES	TIPO_PACIENTE	...	CARDIOVASCULAR	OBESIDAD	RENAL_CRONICA	TABAQUISMO	OTRO_CASO	RESULTADO	MIGRANTE	PAIS_NACIONALIDAD	PAIS_ORIGEN	UCI
0	2020-05-21	11e989	2	3	27	2	27	27	4	2	...	2	2	2	2	1	1	99	MÃ©xico	99	2
1	2020-05-21	1aad65	2	4	19	2	5	5	18	2	...	2	2	2	2	99	1	99	MÃ©xico	99	1
2	2020-05-21	04f631	2	4	14	1	14	14	67	1	...	2	2	2	2	99	1	99	MÃ©xico	99	97
3	2020-05-21	02556b	2	4	15	1	15	15	110	2	...	2	2	2	2	99	1	99	MÃ©xico	99	2
4	2020-05-21	0356d5	2	4	9	1	9	9	5	2	...	2	2	2	1	99	1	99	MÃ©xico	99	2
5	2020-05-21	1d2dfb	2	4	25	2	14	25	1	1	...	2	2	2	2	99	1	99	MÃ©xico	99	97
6	2020-05-21	1b3e2b	2	4	9	2	7	9	2	1	...	2	2	2	2	99	1	99	MÃ©xico	99	97
7	2020-05-21	0c0eef	2	4	21	1	21	21	114	2	...	2	1	2	2	99	1	99	MÃ©xico	99	2
8	2020-05-21	043ea2	2	4	27	2	27	27	4	1	...	2	2	2	2	99	1	99	MÃ©xico	99	97
9	2020-05-21	0bd39a	2	4	8	1	8	8	17	1	...	2	2	2	2	99	1	99	MÃ©xico	99	97

10 rows × 35 columns

temp = covid19_oficial[['FECHA_DEF', 'ENTIDAD_RES']]

temp = temp.set_index('FECHA_DEF')

temp

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	ENTIDAD_RES
FECHA_DEF
2020-04-27	27
2020-04-03	5
9999-99-99	14
2020-04-20	15
9999-99-99	9
...	...
2020-05-14	15
9999-99-99	26
9999-99-99	26
9999-99-99	15
9999-99-99	9

201838 rows × 1 columns

from_cdmx = temp.loc[(temp['ENTIDAD_RES'] == 9)] #Number nine corresponds to CDMX as stated in the data dictionary

from_cdmx

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	ENTIDAD_RES
FECHA_DEF
9999-99-99	9
9999-99-99	9
9999-99-99	9
9999-99-99	9
2020-03-22	9
...	...
2020-05-05	9
9999-99-99	9
9999-99-99	9
9999-99-99	9
9999-99-99	9

46594 rows × 1 columns

from_cdmx.drop(['ENTIDAD_RES'], axis=1, inplace=True, errors='ignore')

C:\Users\User\Anaconda3\lib\site-packages\pandas\core\frame.py:3997: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,

from_cdmx

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>


FECHA_DEF
9999-99-99
9999-99-99
9999-99-99
9999-99-99
2020-03-22
...
2020-05-05
9999-99-99
9999-99-99
9999-99-99
9999-99-99

46594 rows × 0 columns

from_cdmx['MUERTES'] = 0

C:\Users\User\Anaconda3\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.

from_cdmx = from_cdmx.groupby('FECHA_DEF').count()

from_cdmx.head()

.dataframe tbody tr th {
    vertical-align: top;
}

.dataframe thead th {
    text-align: right;
}

</style>

	MUERTES
FECHA_DEF
2020-03-16	1
2020-03-22	1
2020-03-23	1
2020-03-25	1
2020-03-26	3

from_cdmx.drop(from_cdmx.tail(1).index, inplace=True) #9999-99-99 date is dropped

from_cdmx = from_cdmx.reset_index()

from_cdmx = from_cdmx.loc[(from_cdmx['FECHA_DEF'] >= '2020-03-26') &  (from_cdmx['FECHA_DEF'] <= '2020-05-12')]

from_cdmx = from_cdmx.set_index('FECHA_DEF')

print(from_cdmx.head(1))
print(from_cdmx.tail(1))

            MUERTES
FECHA_DEF          
2020-03-26        3
            MUERTES
FECHA_DEF          
2020-05-12       60

from_cdmx = from_cdmx.reset_index()
from_cdmx['FECHA_DEF'] = pd.to_datetime(from_cdmx['FECHA_DEF'])

from_cdmx = from_cdmx.set_index('FECHA_DEF')

Visualization

sns.set(rc={'figure.figsize':(14, 7)})

start, end = '2020-03', '2020-05'

fig, ax = plt.subplots()
ax.plot(from_cdmx.loc[start:end],
marker='o', markersize=8, linestyle='-', label='gob.mx')
ax.plot(number_by_date.loc[start:end],
marker='o', markersize=8, linestyle='-', label='contralacorrupcion.mx')
ax.set_xlabel('Date')
ax.set_ylabel('Number of Deaths')
ax.set_title('COVID-19 death comparison in CDMX')
ax.legend()

<matplotlib.legend.Legend at 0x1946b7e5a58>

Name		Name	Last commit message	Last commit date
Latest commit History 8 Commits
README.md		README.md
covid-19 death comparison cdmx.ipynb		covid-19 death comparison cdmx.ipynb
plot.png		plot.png

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

Data from contralacorrupcion.mx

Data from gob.mx

Visualization

About

Releases

Packages

Languages

alejandropuerto/covid-19-death-comparison

Folders and files

Latest commit

History

Repository files navigation

Data from contralacorrupcion.mx

Data from gob.mx

Visualization

About

Topics

Resources

Stars

Watchers

Forks

Releases

Packages 0

Languages

Packages