-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathenum_by_year.R
32 lines (30 loc) · 2.89 KB
/
enum_by_year.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
DESCRIPTION <- "build a table of year by some enumeration in the data. Extremely useful for stream, stacked bar, and line charts."
load(data)
# motive by year (2009-2014) for Breaches
## TO USE: 0. Change the filters in the first line unless you want no Error actions, only Breaches, and no other filters
## 1. Change the starts_with()'s in the first select statement to the columns you want
## 2. Change the "4" in the separate line to the number of columns. (if you're doing actor.Internal.motive.m, it'd be 4 because there's 4 parts.)
## 3. Change the "4" in the select line below the separate line to whatever number you used in step 2.
## 4. Change the "4" in the rename line to whatever number you used in step 2.
## NOTE 1: This only works if all enumerations are the same level down the tree. (ie no actor.Internal.motive.m joined with actor.unknown)
## NOTE 2: If you remove the separate-select-rename lines you don't join the lower level values
temp <- vz %>%
filter(attribute.confidentiality.data_disclosure.Yes, action.Error != TRUE) %>% # Filter to the data we want to work on.
select(starts_with("actor.Internal.motive"),
starts_with("actor.External.motive"),
starts_with("actor.Partner.motive"),
timeline.incident.year) %>% # Select only the data column(s) we plan to count & the year we plan to count by
gather(enum,
value,
-timeline.incident.year) %>% # Gather column name - value pairs into 2 columns: enum - value
filter(value) %>% # Remove the the rows that are false (ie where the enumeration wasn't true and, tehrefore, which we don't want to count)
select(-value) %>% # Remove the value column as we no longer need it
separate(enum, 1:4, "[.]", extra="merge") %>% # separate the sections of the VERIS enumeration
select(timeline.incident.year, `4`) %>% # Remove the other columns generated by splitting VERIS enumerations
rename(enum = `4`) %>% # Rename the last column back to "enum" (which it was before)
group_by(timeline.incident.year, enum) %>% # group the data by year and enumeration (the cells in the ultimate table)
summarize(x = n()) %>% # Each cell's value is the count of the number of times the enum - year combination is true. n() counts the rows in the group.
mutate(n = sum(x)) %>% # Sum the x values to get an n. I don't know why, but this does it by the first group_by grouping (in this case 'year')
mutate(freq = round(100 * x/n, 2), `count/total` = paste(x, n, sep="/")) # Create frequency and count/total columns from x and n
temp %>% acast(enum~timeline.incident.year, value.var="count/total") # turn the enum, year, count/total columns into a table with columns of year, rows of enum, and values of count/total
temp %>% acast(enum~timeline.incident.year, value.var="freq", fill=0) # turn the enum, year, freq columns into a table with columns of year, rows of enum, and values of freq