-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path06_split-large-wavs.Rmd
125 lines (94 loc) · 4.43 KB
/
06_split-large-wavs.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
---
editor_options:
chunk_output_type: console
---
# Splitting large .wav files
Here, we will first split the raw data which was collected for 24 hours at a site, for 7 days at a stretch. This is being done for the sake of manual annotation of bird species. The deployment schedule of the AudioMoths was set to record for 4-minutes and was switched off for 1-min. For the sake of analysis, data was split into 10s chunks and annotated manually using Raven Pro.
## Load required libraries
```{r}
library(warbleR)
library(seewave)
library(dplyr)
library(stringr)
library(tools)
# Source any custom/other internal functions necessary for analysis
source("code/01_internal-functions.R")
```
## Selecting dawn acoustic data
We will use warbleR::split.wavs() to split a large file. To do so, we will first load a list of .wav files from folders (will have to be done site by site). Next, we we select only files between 6 am and 10 am (this can be varied depending on the exercise or the question at hand). For each day selected, we randomly extracted a continuous 16-min of recording.
```{r}
# List the path that contains all folders, which contain the audiomoth data
path <- "C:\\data\\2020-winter\\"
# Listing the folders within which .WAV files are stored
folders <- dir(path, recursive=F,full.names=T)
# Now get only those files that begin at 6am and end at 10am
files <- list()
for(i in 1:length(folders)){
setwd(folders[i])
# Below code needs to be run only if we have to rename files
# List the files within each folder and renaming the files with the prefix - SITE_ID
a <- list.files(paste0(path,basename(folders)[i],"\\"), full.names = T)
file.rename(from = a, to=paste0(basename(folders)[i],"_",basename(a)))
# Extract the strings for .wav files between 6am and 10am
time_str <- list.files(paste0(path,basename(folders)[i],"\\"),full.names = T) %>%
tools::file_path_sans_ext() %>% str_extract('\\d+$')
time_str <- time_str[time_str>="060000" & time_str <="100000"] # vary times here depending on the question at hand
for(j in 1:length(unique(time_str))){
b <- list.files(paste0(path,basename(folders)[i],"\\"),full.names = T,
pattern = time_str[j])
files <- c(files,b)
}
}
# These are the list of files we need
files <- unlist(files)
# Now we choose a random consecutive 16 min of data between 6am and 10am
# Get a list of unique dates (since we will be generating a random 16min for every date across every site)
site_date <- str_extract(basename(files),'\\w+_\\d+_')
unique(site_date) # Give you unique date and sites for which we need to generate 16 min of data
subset_files <- list()
for(i in 1:length(unique(site_date))){
a <- files[str_detect(files,unique(site_date)[i])]
if(length(a)<4){ # essentially specifies that the min number you need
next
} else {
subset_dat <- extractRandWindow(a,4)
subset_dat <- na.exclude(subset_dat) # If there are less than 4 files
subset_files <- c(subset_files, subset_dat)
}
}
final_subset <- unlist(subset_files)
# Subset those files and copy it to a separate folder
# Please note that these folders & files are locally stored (they are extremely large and cannot be added to GitHub)
dir.create(paste0("C:\\data\\","subset"))
file.copy(from = final_subset, to="C:\\data\\subset\\")
```
## Split the files
Split the files and provide unique names to each file
```{r}
# Note: the path you choose to store data is upto the user.
subset_path <- "C:\\data\\subset\\"
# Split the files into n-second chunks
split_wavs(path=subset_path, sgmt.dur = 10, parallel=4)
# Get files that need to be renamed
split_files <- list.files(subset_path, full.names = T, pattern = "-")
# Note the number of chunks will vary as a function of segment duration
# 240 seconds = 24 chunks each of 10s
setwd(subset_path)
chunks <- c("01-10","10-20","20-30",
"30-40","40-50","50-60",
"60-70","70-80","80-90",
"90-100","100-110","110-120",
"120-130","130-140","140-150",
"150-160","160-170","170-180",
"180-190","190-200","200-210",
"210-220","220-230","230-240")
for(i in 1:length(chunks)){
c <- split_files[endsWith(split_files,paste0("-",i,".wav"))]
d <- str_replace(c,paste0("-",i),paste0("_",chunks[i]))
file.rename(from=c, to=d)
}
# Remove the original files
orig_files <- list.files(subset_path, full.names = T, pattern = ".WAV$")
file.remove(orig_files)
```
Now, go ahead and begin the process of manual annotation!