-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathitalyvaccination (1).py
1641 lines (1221 loc) · 57 KB
/
italyvaccination (1).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
"""italyvaccination.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/gist/leanerr/1702cf51c7413ad9cb577eacca7da34e/italyvaccination.ipynb
"""
import numpy as np
import pandas as pd
# Specify the path to your CSV file
csv_file_path = '/content/italian_vaccination.csv'
# Read CSV using pandas
df = pd.read_csv(csv_file_path)
# Display the pandas DataFrame
print("DataFrame from pandas:")
print(df)
# Access data using NumPy (if needed)
numpy_array = df.to_numpy()
print("\nNumPy array from pandas DataFrame:")
print(numpy_array)
df.head(1000)
df.tail()
df.shape
import missingno as msno
import matplotlib.pyplot as plt
# Check for missing values
print("\nMissing values in the dataset:")
print(df.isnull().sum())
# Visualize missing values with a matrix plot
msno.matrix(df)
plt.title("Missing Values Matrix")
plt.show()
df.info()
# Check for negative values
print("\nColumns with negative values:")
for column in df.columns:
# Check if the column contains numeric values
if pd.api.types.is_numeric_dtype(df[column]):
negative_values_count = (df[column] < 0).sum()
if negative_values_count > 0:
print(f"{column}: {negative_values_count} negative values")
# Visualize negative values with a histogram
numeric_df = df.select_dtypes(include=['number'])
numeric_df[numeric_df < 0].hist(bins=20, figsize=(10, 6))
plt.suptitle("Histogram of Negative Values")
plt.show()
df.nunique()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Load the dataset
df = pd.read_csv('italian_vaccination.csv')
numerical_features = ['first_dose', 'second_dose', 'males', 'females','previous_infection', 'additional_booster_dose', 'second_booster', 'db3']
# Explore correlations between numerical features
correlation_matrix = df[numerical_features].corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.show()
!pip install plotly
df.region.value_counts()
df.supplier.value_counts()
import seaborn as sns
import matplotlib.pyplot as plt
df.females.sum(),df.males.sum()
total_vaccinations=df.males.sum()+df.females.sum()
total_vaccinations
print("Number of Vaccination for Gender")
tot=np.array([df.males.sum(),df.females.sum()])
tot=pd.DataFrame(tot,index=["Males","Females"])
tot.columns=["Total"]
tot.gender=["Males","Females"]
tot
import matplotlib.pyplot as plt
import pandas as pd
# Assuming 'df' contains the dataset with columns 'males' and 'females'
# If not, replace these columns with the correct ones from your DataFrame
# Calculate the total vaccinations for each gender
total_male = df['males'].sum()
total_female = df['females'].sum()
# Plotting using Matplotlib
plt.figure(figsize=(8, 6))
plt.bar(['Males', 'Females'], [total_male, total_female], color=['blue', 'red'])
plt.title('Total Number of Vaccinations by Gender')
plt.xlabel('Gender')
plt.ylabel('Total')
plt.show()
"""
Of course we have to weight this data with the real values for the distribution of the population in Italy. We found in this site this values of 28,749,359 for Male population and 30,101,358 for Female population who is registered at the moment in Italy.
You can find this data at this site: 28864088 https://www.statista.com/statistics/786485/population-by-gender-in-italy/
"""
tot["Registered"]=[28749359,30101358]
tot["Prop"]=tot.Total/tot.Registered
tot
import pandas as pd
# Assuming df is your original DataFrame
# Replace this with your actual DataFrame
# df = ...
# Group by "administration_date" and "region_name," and sum the "males" and "females" columns
RegionOverTimeMaleFemale = pd.DataFrame(df.groupby(["administration_date", "region_name"])[["males", "females"]].sum().sort_values(by="males", ascending=False))
RegionOverTimeMaleFemale.to_csv('RegionOverTimeMaleFemale.csv', index=True)
# Print the result DataFrame
print("Number of Vaccination for Region on male and female population:")
print(RegionOverTimeMaleFemale)
print("Number of Vaccination for Region on male population")
RegionAllTimeMaleFemale = pd.DataFrame(df.groupby("region_name")[["males", "females"]].sum().sort_values(by="males", ascending=False))
RegionAllTimeMaleFemale.to_csv('RegionAllTimeMaleFemale.csv', index=True)
RegionAllTimeMaleFemale
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# RegionAllTimeMaleFemale = ...
# Convert the Matplotlib plot to Plotly
fig = make_subplots(rows=1, cols=1)
# Add traces for males and females
fig.add_trace(go.Bar(x=RegionAllTimeMaleFemale['males'], y=RegionAllTimeMaleFemale.index, orientation='h', name='Males'))
fig.add_trace(go.Bar(x=RegionAllTimeMaleFemale['females'], y=RegionAllTimeMaleFemale.index, orientation='h', name='Females'))
# Update layout for better presentation
fig.update_layout(title_text='Number of Vaccinations by Region and Gender',
xaxis_title='Count',
yaxis_title='Region Name',
barmode='stack')
# Show the plot
fig.show()
DosesOverTime_df = df.groupby("administration_date")['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3'].sum()
# Save the result as a new CSV file
DosesOverTime_df.to_csv('DosesOverTime_df.csv', index=True)
DosesOverTime_df
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Assuming you have a DataFrame 'DosesOverTime_df'
# DosesOverTime_df = ...
# Define a custom color palette with bold colors
custom_palette = ["#3498db", "#2ecc71", "#e74c3c", "#f39c12", "#9b59b6", "#34495e"]
# Convert the Matplotlib/Seaborn plot to Plotly
fig = make_subplots(rows=1, cols=1)
# Iterate over the columns and add traces
for column in DosesOverTime_df.columns:
fig.add_trace(go.Scatter(x=DosesOverTime_df.index, y=DosesOverTime_df[column], mode='lines', name=column, line=dict(width=2.5)))
# Update layout for better presentation
fig.update_layout(title_text='Number of Doses Administered Over Time',
xaxis_title='Administration Date',
yaxis_title='Cumulative Count',
legend_title='Dose Type')
# Show the plot
fig.show()
df.groupby("administration_date")['males', 'females'].sum()
df
df["dailytotal"]=df.males+df.females
df.head()
dictionary={"males":df.groupby("administration_date")["males"].sum(),"females":df.groupby("administration_date")["females"].sum()}
daily_gender=pd.DataFrame(dictionary)
daily_gender["Total"]=daily_gender.males+daily_gender.females
daily_gender.to_csv('daily_genderTotal.csv', index=True)
daily_gender
daily_gender.to_csv("dailyTotalMaleFemale.csv")
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Assuming you have a DataFrame 'daily_gender'
# daily_gender = ...
# Define a custom color palette with bold colors
custom_palette = ["#3498db", "#2ecc71", "#e74c3c", "#f39c12"]
# Convert the Matplotlib/Seaborn plot to Plotly
fig = make_subplots(rows=1, cols=1)
# Iterate over the columns and add traces
for column in daily_gender.columns:
fig.add_trace(go.Scatter(x=daily_gender.index, y=daily_gender[column], mode='lines', name=column, line=dict(width=2.5)))
# Update layout for better presentation
fig.update_layout(title_text='Number of Vaccinations by Gender Over Time',
xaxis_title='Administration Date',
yaxis_title='Cumulative Count',
legend_title='Gender')
# Show the plot
fig.show()
df.age_range
print("Number of Vaccination for Age in class for the male population")
df.groupby("age_range")["males"].sum().sort_values(ascending = False)
import pandas as pd
import matplotlib.pyplot as plt
# Assuming you have a DataFrame 'MaleVaccinationByAge'
# MaleVaccinationByAge = ...
MaleVaccinationByAge = df.groupby("age_range")["males"].sum().sort_values(ascending=False).reset_index()
# Plotting without interactive features
plt.figure(figsize=(12, 8))
plt.bar(MaleVaccinationByAge['age_range'], MaleVaccinationByAge['males'], color='skyblue')
plt.title('Number of Vaccinations for Each Age Range in the Male Population')
plt.xlabel('Age Range')
plt.ylabel('Number of Vaccinations')
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
# Assuming you have a DataFrame 'feMaleVaccinationByAge'
# feMaleVaccinationByAge = ...
femaleVaccinationByAge = df.groupby("age_range")["females"].sum().sort_values(ascending=False).reset_index()
# Plotting without interactive features
plt.figure(figsize=(12, 8))
plt.bar(femaleVaccinationByAge['age_range'], femaleVaccinationByAge['females'], color='red')
plt.title('Number of Vaccinations for Each Age Range in the female Population')
plt.xlabel('Age Range')
plt.ylabel('Number of Vaccinations')
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Assuming you have a DataFrame 'df'
# df = ...
# Group by "age_range" and sum the doses for males, females, and total
VaccinationByAge = df.groupby("age_range")[["males", "females"]].sum().sort_values(by="males", ascending=False)
# Set Seaborn's "deep" color palette for distinct colors
sns.set_palette("deep")
# Plotting
plt.figure(figsize=(12, 8))
sns.barplot(x=VaccinationByAge.index, y=VaccinationByAge["males"], color="skyblue", label="Males")
sns.barplot(x=VaccinationByAge.index, y=VaccinationByAge["females"], color="red", label="Females")
plt.title('Number of Vaccinations for Each Age Range')
plt.xlabel('Age Range')
plt.ylabel('Count')
plt.legend()
plt.show()
print(((df.groupby("age_range")["dailytotal"].sum().sort_values()/total_vaccinations)*100).round(2))
male=pd.DataFrame(df.groupby("supplier")["males"].sum().sort_values(ascending = False)/df.males.sum())
female=pd.DataFrame(df.groupby("supplier")["females"].sum().sort_values(ascending = False)/df.females.sum())
supplier_tot=pd.DataFrame(df.groupby("supplier").sum())
supplier_tot.to_csv('supplier_total.csv', index=True)
supplier_tot
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Add 'males' and 'females' for each record and create a new column 'total'
supplier_tot['total'] = supplier_tot['males'] + supplier_tot['females']
supplier_tot.to_csv('supplier_total.csv', index=True)
# Display the updated DataFrame
supplier_tot
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'supplier_tot'
# supplier_tot = ...
# Group by "supplier" and sum the doses for males, females, and total
SupplierVaccination = supplier_tot.groupby("supplier")[["males", "females"]].sum().reset_index()
# Create an interactive stacked bar plot using Plotly Express
fig = px.bar(SupplierVaccination, x='supplier', y=['males', 'females'],
labels={'value': 'Count', 'variable': 'Gender'},
color_discrete_sequence=['#3498db', '#e74c3c'],
title='Vaccination Count by Supplier')
# Show the plot
fig.show()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Assuming you have a DataFrame 'supplier_tot'
# supplier_tot = ...
# Group by "supplier" and sum the doses for males, females, and total
SupplierVaccination = supplier_tot.groupby("supplier")[["males", "females"]].sum()
# Set a custom color palette for distinct colors
custom_palette = sns.color_palette("husl", 3)
sns.set_palette(custom_palette)
# Convert the Matplotlib/Seaborn plot to Plotly
fig = make_subplots(rows=1, cols=1)
# Iterate over the columns and add traces
for gender in SupplierVaccination.columns:
fig.add_trace(go.Bar(x=SupplierVaccination.index, y=SupplierVaccination[gender], name=gender))
# Update layout for better presentation
fig.update_layout(title_text='Vaccination Count by Supplier (log scale)',
xaxis_title='Supplier',
yaxis_title='Count (log scale)',
barmode='stack')
# Set y-axis to log scale
fig.update_layout(yaxis_type='log')
# Show the plot
fig.show()
massi=male
massi["gender"]="male"
massi.columns=["tot","gender"]
massi
female["gender"]="female"
female.columns=["tot","gender"]
female
#male.index.sort_values()
supp_man=pd.Series(df.groupby("supplier")["males"].sum())
supp_women=pd.Series(df.groupby("supplier")["females"].sum())
supp_women
supplier_gender={"man":supp_man,"woman":supp_women}
pd.DataFrame(supplier_gender,index=male.index.sort_values())
massi.tot=df.groupby("supplier")["males"].sum().sort_values(ascending = False)
female.tot=df.groupby("supplier")["females"].sum().sort_values(ascending = False)
genderdf=[male,female]
genderdf=pd.concat(genderdf)
genderdf["supplier"]=genderdf.index
genderdf
df["dailytotal"]=df.males+df.females
reg_name=pd.DataFrame(df.groupby("region_name")[['dailytotal','first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3',"males","females"]].sum())
reg_name.to_csv("RegionsDosesTotal.csv")
#reg_name["pop_resid"]=regions["pop_resid"]
#reg_name["propTV"]=reg_name["TotVaccine"]/reg_name["pop_resid"]
reg_name1=reg_name.drop(index=("Provincia Autonoma Bolzano / Bozen"))
reg_name1=reg_name1.drop(index=("Provincia Autonoma Trento"))
reg_name
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# Assuming you have a DataFrame 'df'
# df = ...
# Add a column for daily total
df["dailytotal"] = df["males"] + df["females"]
# Group by "supplier" and sum the doses for each category
SupplierDoses = df.groupby("supplier")[['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3']].sum()
# Set Seaborn's "deep" color palette for distinct colors
sns.set_palette("deep")
# Convert the Matplotlib/Seaborn plot to Plotly
fig = make_subplots(rows=1, cols=1)
# Iterate over the columns and add traces
for dose_type in SupplierDoses.columns:
fig.add_trace(go.Bar(x=SupplierDoses.index, y=SupplierDoses[dose_type], name=dose_type))
# Update layout for better presentation
fig.update_layout(title_text='Number of Doses by Supplier',
xaxis_title='Supplier',
yaxis_title='Count',
barmode='stack')
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df'
# df = ...
# Add a column for daily total
df["dailytotal"] = df["males"] + df["females"]
# Group by "supplier" and sum the doses for each category
SupplierDoses = df.groupby("supplier")[['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3']].sum()
# Reset index to convert the grouped data back to a DataFrame
SupplierDoses.reset_index(inplace=True)
# Melt the DataFrame for Plotly Express compatibility
SupplierDoses_melted = pd.melt(SupplierDoses, id_vars=['supplier'], var_name='Dose Type', value_name='Count')
# Create an interactive bar plot using Plotly Express
fig = px.bar(SupplierDoses_melted, x='supplier', y='Count', color='Dose Type',
labels={'Count': 'Number of Doses (log scale)'},
title='Number of Doses by Supplier (log scale)',
log_y=True) # Set log scale on the y-axis
# Show the plot
fig.show()
df.head()
#Not WORKING!!!!!!!!!!!!!!!!!!!!!!!
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3'
# df = ...
# Sum the doses for each region
df_summed = df.groupby('region_name').agg({
'first_dose': 'sum',
'second_dose': 'sum',
'previous_infection': 'sum',
'additional_booster_dose': 'sum',
'second_booster': 'sum',
'db3': 'sum'
}).reset_index()
# Melt the DataFrame to create a long-format for Plotly Express
df_melted = pd.melt(df_summed, id_vars=['region_name'], value_vars=['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3'],
var_name='Dose Type', value_name='Count')
# Create an interactive horizontal bar plot using Plotly Express
fig = px.bar(df_melted, x='Count', y='region_name', color='Dose Type',
labels={'Count': 'Count', 'Dose Type': 'Dose Type'},
title='Distribution of Vaccination Doses by Region',
orientation='h', barmode='stack') # Set orientation='h' for horizontal and barmode='stack' for stacked bars
# Show the plot
fig.show()
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# Assuming you have a DataFrame 'df' with columns 'region_name', 'first_dose', 'second_dose', 'dailytotal', etc.
# df = ...
# Get the top 4 regions based on 'dailytotal'
top_regions = df.groupby('region_name')['dailytotal'].sum().sort_values(ascending=False).head(4).index
# Filter DataFrame for the top 4 regions
df_top_regions = df[df['region_name'].isin(top_regions)]
# Melt the DataFrame for Seaborn compatibility
df_melted = pd.melt(df_top_regions, id_vars=['region_name'], value_vars=['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3'],
var_name='Dose Type', value_name='Count')
# Set Seaborn's "deep" color palette for distinct colors
sns.set_palette("deep")
# Plotting
plt.figure(figsize=(12, 8))
sns.barplot(x='region_name', y='Count', hue='Dose Type', data=df_melted, ci=None) # Remove confidence intervals
plt.title('Distribution of Vaccination Doses by Top 4 Regions (Based on dailytotal)')
plt.xlabel('Region Name')
plt.ylabel('Count')
plt.legend(title='Dose Type', bbox_to_anchor=(1, 1), loc='upper left')
plt.show()
# Not Working!!!!!!!!!!!!!!!!
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name' and 'supplier'
# df = ...
# Count the occurrences of each supplier in each region
supplier_region_counts = df.groupby(['region_name', 'supplier']).size().reset_index(name='Count')
# Create an interactive horizontal bar plot using Plotly Express
fig = px.bar(supplier_region_counts, x='Count', y='region_name', color='supplier',
labels={'Count': 'Occurrences'},
title='Distribution of Suppliers by Region',
orientation='h') # Set orientation='h' for horizontal
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'supplier', and 'dailytotal'
# df = ...
# Sum the 'dailytotal' for each supplier in each region
supplier_region_weighted_counts = df.groupby(['region_name', 'supplier'])['dailytotal'].sum().reset_index(name='Weighted_Count')
# Create an interactive horizontal bar plot using Plotly Express
fig = px.bar(supplier_region_weighted_counts, x='Weighted_Count', y='region_name', color='supplier',
labels={'Weighted_Count': 'Weighted Occurrences'},
title='Distribution of Suppliers by Region (Weighted by dailytotal)',
orientation='h') # Set orientation='h' for horizontal
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns like 'administration_date', 'supplier', 'dailytotal', etc.
# df = ...
# Create an interactive line plot using Plotly Express
fig = px.line(df, x='administration_date', y='dailytotal', color='supplier',
labels={'dailytotal': 'Daily Vaccination Count'},
title='Vaccination Counts Over Time by Supplier')
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns like 'first_dose', 'second_dose', etc.
# df = ...
# Sum the doses for each type
dose_sums = df[['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3']].sum()
# Create a DataFrame for plotting
df_plot = pd.DataFrame({'Dose Type': dose_sums.index, 'Count': dose_sums.values})
# Create an interactive bar plot using Plotly Express
fig = px.bar(df_plot, x='Dose Type', y='Count',
labels={'Count': 'Total Number of Doses'},
title='Total Number of Doses by Dose Type')
# Show the plot
fig.show()
import numpy as np
import pandas as pd
# Load the dataset into a Pandas DataFrame
file_path = 'ita_reg_ann_data.csv'
df = pd.read_csv(file_path)
# Print the columns
print("Columns in the dataset:")
print(df.columns)
df.head()
import pandas as pd
# Load the datasets
italian_vaccination = pd.read_csv('italian_vaccination.csv')
ita_reg_ann_data = pd.read_csv('ita_reg_ann_data.csv')
# Extract relevant columns from ita_reg_ann_data
columns_to_add = ['cod_reg', 'den_reg', 'pop_resid', 'gdp_tot', 'dens_ab']
data_to_add = ita_reg_ann_data[columns_to_add]
# Merge datasets based on 'cod_reg' and 'den_reg'
merged_data = pd.merge(italian_vaccination, data_to_add, how='left', left_on=['ISTAT_regional_code', 'region_name'],
right_on=['cod_reg', 'den_reg'])
# Drop duplicate columns and unnecessary ones
columns_to_drop = ['cod_reg', 'den_reg']
merged_data.drop(columns=columns_to_drop, inplace=True)
# Save the merged dataset
merged_data.to_csv('italian_vaccination_mergedPopGDP.csv', index=False)
merged_data.tail()
import numpy as np
import pandas as pd
# Load the dataset into a Pandas DataFrame
file_path = '/content/ita_reg_mens_clima.csv'
df = pd.read_csv(file_path)
# Print the columns
print("Columns in the dataset:")
print(df.columns)
df.head()
import numpy as np
import pandas as pd
# Load the dataset into a Pandas DataFrame
file_path = '/content/italian_vaccination_mergedPopGDP.csv'
df = pd.read_csv(file_path)
df['dailytotal'] = df['males'] + df['females']
df.to_csv('italian_vaccination_mergedPopGDP.csv', index=False)
# Print the columns
print("Columns in the dataset:")
print(df.columns)
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name' and 'pop_resid'
# df = ...
# Extract unique pop_resid values for each region_name
unique_pop_resid = df.groupby('region_name')['pop_resid'].first().reset_index()
# Create an interactive horizontal bar plot using Plotly Express
fig = px.bar(unique_pop_resid, x='pop_resid', y='region_name',
labels={'pop_resid': 'Population Residual'},
title='Population Residual for Each Region',
orientation='h') # Set orientation='h' for horizontal
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name' and 'dailytotal'
# df = ...
# Group by 'region_name' and sum 'dailytotal'
region_dailytotal = df.groupby('region_name')['dailytotal'].sum().reset_index()
# Create an interactive horizontal bar plot using Plotly Express
fig = px.bar(region_dailytotal, x='dailytotal', y='region_name',
labels={'dailytotal': 'Total Daily Vaccinations'},
title='Total Daily Vaccinations for Each Region',
orientation='h') # Set orientation='h' for horizontal
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'dailytotal', and 'pop_resid'
# df = ...
# Group by 'region_name' and sum 'dailytotal', and extract unique 'pop_resid' values
region_stats = df.groupby('region_name').agg({'dailytotal': 'sum', 'pop_resid': 'first'}).reset_index()
# Melt the DataFrame to create a long-format for Plotly Express
melted_df = pd.melt(region_stats, id_vars='region_name', value_vars=['dailytotal', 'pop_resid'],
var_name='Metric', value_name='Value')
# Create a grouped bar plot
fig = px.bar(melted_df, x='Value', y='region_name', color='Metric',
labels={'Value': 'Value', 'Metric': 'Metric'},
title='Comparison of Population and Vaccination Doses for Each Region',
orientation='h', width=800, height=400)
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'dailytotal', and 'pop_resid'
# df = ...
# Group by 'region_name' and sum 'dailytotal', and extract unique 'pop_resid' values
region_stats = df.groupby('region_name').agg({'dailytotal': 'sum', 'pop_resid': 'first'}).reset_index()
# Create a scatter plot
fig = px.scatter(region_stats, x='dailytotal', y='pop_resid', color='region_name',
labels={'dailytotal': 'Daily Total', 'pop_resid': 'Population Residual'},
title='Comparison of Daily Total with Population Residual for Each Region',
width=800, height=400)
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'gdp_tot', and 'dailytotal'
# df = ...
# Group by 'region_name' and get the first value for 'gdp_tot' and sum 'dailytotal'
region_stats = df.groupby('region_name').agg({'gdp_tot': 'first', 'dailytotal': 'sum'}).reset_index()
# Replace NaN values in 'gdp_tot' with a default size (e.g., 100)
default_size = 100
region_stats['gdp_tot'].fillna(default_size, inplace=True)
# Convert 'gdp_tot' to million euros
region_stats['gdp_tot_million'] = region_stats['gdp_tot'] * 1_000_000
# Create a scatter plot
fig = px.scatter(region_stats, x='dailytotal', y='gdp_tot_million', color='region_name',
size='gdp_tot', # Use 'gdp_tot' for the size of the markers
labels={'dailytotal': 'Daily Total', 'gdp_tot_million': 'GDP Total ', 'region_name': 'Region'},
title='Comparison of Daily Total with GDP Total for Each Region',
width=800, height=400)
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'previous_infection'
# df = ...
# Sum the 'previous_infection' doses for each region
df_sum_previous_infection = df.groupby('region_name')['previous_infection'].sum().reset_index()
# Create an interactive horizontal bar plot for 'previous_infection' doses using Plotly Express
fig = px.bar(df_sum_previous_infection, x='previous_infection', y='region_name',
labels={'previous_infection': 'Count', 'region_name': 'Region'},
title='Distribution of Previous Infection Doses by Region',
orientation='h')
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'previous_infection'
# df = ...
# Sum the 'previous_infection' doses for each region
df_sum_previous_infection = df.groupby('region_name')['previous_infection'].sum().reset_index()
# Create a bar plot for 'previous_infection' doses using Plotly Express
fig = px.bar(df_sum_previous_infection, x='region_name', y='previous_infection',
labels={'previous_infection': 'Count', 'region_name': 'Region'},
title='Distribution of Previous Infection Doses by Region')
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'previous_infection', 'gdp_tot', and 'pop_resid'
# df = ...
# Multiply 'gdp_tot' by 1,000,000 to convert to million euros
df['gdp_tot'] *= 1000000
# Group by 'region_name' and sum 'previous_infection'
region_stats = df.groupby('region_name').agg({'previous_infection': 'sum', 'gdp_tot': 'first', 'pop_resid': 'first'}).reset_index()
# Fill NaN values in 'pop_resid' with 0
region_stats['pop_resid'].fillna(0, inplace=True)
# Create a scatter plot
fig = px.scatter(region_stats, x='previous_infection', y='gdp_tot', size='pop_resid', color='region_name',
labels={'previous_infection': 'Sum of Previous Infection Count', 'gdp_tot': 'GDP Total', 'pop_resid': 'Population'},
title='Comparison of Previous Infection with GDP and Population for Each Region',
width=800, height=400)
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'region_name', 'age_range', and 'dailytotal'
# df = ...
# Group by 'region_name' and 'age_range', summing 'dailytotal'
region_age_distribution = df.groupby(['region_name', 'age_range'])['dailytotal'].sum().reset_index()
# Create a bar plot
fig = px.bar(region_age_distribution, x='region_name', y='dailytotal', color='age_range',
labels={'dailytotal': 'Count', 'region_name': 'Region', 'age_range': 'Age Range'},
title='Distribution of Age Range by Region',
width=1000, height=700)
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'age_range', 'first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3'
# df = ...
# Sum the doses for each age range
df_summed = df.groupby('age_range').agg({
'first_dose': 'sum',
'second_dose': 'sum',
'previous_infection': 'sum',
'additional_booster_dose': 'sum',
'second_booster': 'sum',
'db3': 'sum'
}).reset_index()
# Melt the DataFrame to create a long-format for Plotly Express
df_melted = pd.melt(df_summed, id_vars=['age_range'], value_vars=['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3'],
var_name='Dose Type', value_name='Count')
# Create a bar plot
fig = px.bar(df_melted, x='Dose Type', y='Count', color='age_range',
labels={'Count': 'Count', 'age_range': 'Age Range', 'Dose Type': 'Dose Type'},
title='Distribution of Doses by Age Range',
width=1000, height=600)
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'age_range', 'supplier', and 'dailytotal'
# df = ...
# Group by 'age_range', 'supplier', and sum 'dailytotal'
df_summed = df.groupby(['age_range', 'supplier']).agg({'dailytotal': 'sum'}).reset_index()
# Create a bar plot
fig = px.bar(df_summed, x='supplier', y='dailytotal', color='age_range',
labels={'dailytotal': 'Total Count', 'age_range': 'Age Range', 'supplier': 'Supplier'},
title='Distribution of Counts by Age Range and Supplier',
width=1000, height=600)
# Show the plot
fig.show()
import pandas as pd
import plotly.express as px
# Assuming you have a DataFrame 'df' with columns 'age_range', 'first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3', 'supplier'
# df = ...
# Sum the doses for each age range and supplier
df_summed = df.groupby(['age_range', 'supplier']).agg({
'first_dose': 'sum',
'second_dose': 'sum',
'previous_infection': 'sum',
'additional_booster_dose': 'sum',
'second_booster': 'sum',
'db3': 'sum'
}).reset_index()
# Melt the DataFrame to create a long-format for Plotly Express
df_melted = pd.melt(df_summed, id_vars=['age_range', 'supplier'], value_vars=['first_dose', 'second_dose', 'previous_infection', 'additional_booster_dose', 'second_booster', 'db3'],
var_name='Dose Type', value_name='Count')
# Create a bar plot
fig = px.bar(df_melted, x='Dose Type', y='Count', color='age_range', facet_col='supplier',
labels={'Count': 'Count', 'age_range': 'Age Range', 'Dose Type': 'Dose Type'},
title='Distribution of Doses by Age Range and Supplier',
width=1200, height=800)
# Show the plot
fig.show()
import pandas as pd
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
# Load the dataset
df = pd.read_csv('/content/italian_vaccination_mergedPopGDP.csv')
# Convert 'administration_date' to datetime
df['administration_date'] = pd.to_datetime(df['administration_date'])
# Group by dates and sum the 'dailytotal'
dailytotal_series = df.groupby('administration_date')['dailytotal'].sum()
# Plot the original time series
plt.figure(figsize=(12, 6))
plt.plot(dailytotal_series, label='Original Series')
plt.title('Original Time Series')
plt.xlabel('Date')
plt.ylabel('Daily Total Vaccinations')
plt.legend()
plt.show()
# Perform Augmented Dickey-Fuller (ADF) test for stationary testing on the original series
result = adfuller(dailytotal_series, autolag='AIC')
print('ADF Statistic (original series):', result[0])
print('p-value (original series):', result[1])
print('Critical Values (original series):', result[4])
# If needed, apply differencing to achieve stationarity (e.g., first-order differencing)
dailytotal_diff = dailytotal_series.diff().dropna()
# Plot the differenced series
plt.figure(figsize=(12, 6))
plt.plot(dailytotal_diff, label='Differenced Series', color='orange')
plt.title('Differenced Time Series')
plt.xlabel('Date')
plt.ylabel('Differenced Daily Total Vaccinations')
plt.legend()
plt.show()
# Perform ADF test on the differenced series
result_diff = adfuller(dailytotal_diff, autolag='AIC')
print('\nADF Statistic after differencing:', result_diff[0])
print('p-value after differencing:', result_diff[1])
print('Critical Values after differencing:', result_diff[4])
# Print whether the series is stationary based on p-value
if result[1] <= 0.05:
print('\nThe original series is likely stationary.')
else:
print('\nThe original series is likely non-stationary.')
if result_diff[1] <= 0.05:
print('The differenced series is likely stationary.')
else:
print('The differenced series is likely non-stationary.')
import pandas as pd
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import matplotlib.pyplot as plt
# Load the dataset
df = pd.read_csv('/content/italian_vaccination_mergedPopGDP.csv')
# Convert 'administration_date' to datetime
df['administration_date'] = pd.to_datetime(df['administration_date'])
# Group by dates and sum the 'dailytotal'
dailytotal_series = df.groupby('administration_date')['dailytotal'].sum()