-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathage_metadata_visualization.Rmd
108 lines (90 loc) · 4.68 KB
/
age_metadata_visualization.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
---
title: "age_metadata_visualization"
output: html_document
---
# number of samples with age data
```{r}
# n samples with age data
sum(!is.na(dataset21and22$age_years))
# n samples with SRM and age data
sum(!is.na(dataset21and22$age_years) & dataset21and22$SRM_present == TRUE)
```
# histogram of age_years
```{r}
# histogram of "age_years" data (excluding exceptions from "host age")
ggplot(data = dataset21and22, aes(x=dataset21and22$"age_years")) +
geom_histogram(binwidth = 5, fill = "grey", color = "black") +
xlab("age in years") +
ggtitle("Histogram of Host Age (Years)") +
scale_x_continuous(breaks = seq(0,110, 5)) +
stat_bin(binwidth = 5, geom="text", aes(label=..count..), vjust=-0.5, size = 3, color="red")
# same histogram, but for samples with SRM_present
ggplot(data = subset(dataset21and22, dataset21and22$SRM_present == TRUE), aes(x = subset(dataset21and22$age_years, dataset21and22$SRM_present == TRUE))) +
geom_histogram(binwidth = 5, fill = "grey", color = "black") +
xlab("age in years") +
ggtitle("Histogram of Host Age for Sample with SRM") +
scale_x_continuous(breaks = seq(0,110, 5)) +
stat_bin(binwidth = 5, geom="text", aes(label=..count..), vjust=-0.5, size = 3, color="red")
# same as top plot, but fill with SRM_present
ggplot(data = dataset21and22, aes(x=dataset21and22$"age_years", fill = dataset21and22$SRM_present)) +
geom_histogram(binwidth = 5) +
xlab("age in years") +
ggtitle("Histogram of Host Age (Years)") +
scale_x_continuous(breaks = seq(0,110, 5)) +
scale_fill_discrete(name = "SRM Present?")
```
# bar plot of binned age ranges
```{r}
# ALL SAMPLES:
# bin: 5 year cutoff
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_5yrCutoff)), aes(x = bin_5yrCutoff)) +
geom_bar(fill = "grey", color = "black") +
geom_text(stat='count', aes(label=..count..), vjust=-0.5, color="red") +
xlab("Age Range (years)") +
ggtitle("Sample Ages: 5 year cutoff for all samples")
#
# # bin: 9 year range
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_9yrSize)), aes(x = factor(bin_9yrSize, levels = c("[0,10)", "[10,20)", "[20,30)", "[30,40)", "[40,50)", "[50,60)", "[60,70)", "[70,80)", "[80,90)", "[90,100)", "[100,110]")))) +
geom_bar(fill = "grey", color = "black") +
geom_text(stat='count', aes(label=..count..), vjust=-0.5, color="red") +
xlab("Age Range (years)") +
ggtitle("Sample Ages: 9 year bins")
# Samples with SRM:
# bin: 5 year cutoff
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_5yrCutoff) & dataset21and22$SRM_present == TRUE), aes(x = bin_5yrCutoff)) +
geom_bar(fill = "grey", color = "black") +
geom_text(stat='count', aes(label=..count..), vjust=-0.5, color="red") +
xlab("Age Range (years)") +
ggtitle("Sample Ages: 5 year cutoff for SRM Present samples")
# bin: 9 year range
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_9yrSize) & dataset21and22$SRM_present == TRUE), aes(x = factor(bin_9yrSize, levels = c("[0,10)", "[10,20)", "[20,30)", "[30,40)", "[40,50)", "[50,60)", "[60,70)", "[70,80)", "[80,90)", "[90,100)", "[100,110]")))) +
geom_bar(fill = "grey", color = "black") +
geom_text(stat='count', aes(label=..count..), vjust=-0.5, color="red") +
xlab("Age Range (years)") +
ggtitle("Sample Ages: 9 year bins for SRM Present samples")
# Compare Samples with and without SRM
# bin: 5 year cutoff
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_5yrCutoff)), aes(x = bin_5yrCutoff, fill = SRM_present)) +
geom_bar() +
geom_text(stat='count', aes(label=..count..), vjust=-0.5, color="black") +
xlab("Age Range (years)") +
ggtitle("Sample Ages: 5 year cutoff for all samples")
# # bin: 9 year range
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_9yrSize) ), aes(x = factor(bin_9yrSize, levels = c("[0,10)", "[10,20)", "[20,30)", "[30,40)", "[40,50)", "[50,60)", "[60,70)", "[70,80)", "[80,90)", "[90,100)", "[100,110]")), fill = SRM_present)) +
geom_bar() +
xlab("Age Range (years)") +
ggtitle("Sample Ages: 9 year bins for all samples")
```
# barplot of SRM present vs. absent groups by age (binned), as a proportion!
```{r}
# bin: 5 year cutoff
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_5yrCutoff)), aes(x = bin_5yrCutoff, fill = SRM_present)) +
geom_bar(position = "fill") +
xlab("Age Bin (years)") +
ggtitle("Sample Ages: 5 year cutoff for all samples")
# bin: 9 year range
ggplot(data = subset(dataset21and22, !is.na(dataset21and22$bin_9yrSize) ), aes(x = factor(bin_9yrSize, levels = c("[0,10)", "[10,20)", "[20,30)", "[30,40)", "[40,50)", "[50,60)", "[60,70)", "[70,80)", "[80,90)", "[90,100)", "[100,110]")), fill = SRM_present)) +
geom_bar(position = "fill") +
xlab("Age Bin (years)") +
ggtitle("Sample Ages: 9 year bins for all samples")
```