initial commit of manuscript source code as maintained in git repo RE…

…AD-PSB-MoveSeals
NEFSC · Sep 30, 2024 · 7faabb5 · 7faabb5
1 parent 1c9e1d6
commit 7faabb5
Show file tree

Hide file tree

Showing 29 changed files with 6,989 additions and 0 deletions.
diff --git a/src/0000_process_wc_summary_files.R b/src/0000_process_wc_summary_files.R
@@ -0,0 +1,199 @@
+# Process wildlife computers transmission summary files to get some useful metadata 
+# EIH
+# updated: 2024-09-27
+# create date cutoffs for data based on start and end of data transmission records 
+# inputs: wildlife computer summary files, '*-Summary.csv' files, '*.All.csv files, and location files
+#         Tag_Deployments_21719_summaries.csv
+# performs some cleaning and joining with meta data to produce cleaned summary and meta data records
+# WILDLIFE COMPUTER DEFINITIONS:
+# https://static.wildlifecomputers.com/2019/05/10152339/Spreadsheet-File-Descriptions.pdf
+
+# But Briefly, I will describe relevant columns for subsetting DEPLOYMENT records 
+# EarliestXmitTime & LatestXmitTime: the time of the first and last received Argos transmission (should be the initial and final record in ALL)
+######## This be inaccurate if the tag was turned on for testing prior to deployment. 
+# As such, to fix this issue, we will subset ALL.csv files to all Argos records day of or after deployment date and occurring in the same deploy year. Then take the min and max of this
+
+# EarliestDataTime: The time stamp of the first data point, including status messages. This should be close to the deployment date.
+# This can be error prone as well as is the case with Robs tags where he does not clear data. To remedy, I will pull the first location from the locations file that occurs during or after deployment day
+# LatestDataTime: The time stamp of the last data point, excluding status messages
+# This can be error prone due to redeployments in subsequent years etc. As such I will replace these errors with the final location timestamp occurring in the year of the target animal deployment.
+rm(list = ls())
+gc()
+setwd("~/seal_telemetry/")
+library(readr)
+library(tidyr)
+library(dplyr)
+library(lubridate)
+
+
+# load Summary files (N=63)
+files = list.files(path = './data/L0/', pattern = '-Summary.csv', recursive = TRUE, full.names = TRUE, all.files = TRUE)
+
+trx = lapply(X = files, FUN = function(x){
+  tmp = read.csv(x)
+  tmp$DeployID = as.character(tmp$DeployID)
+  tmp$EarliestDataTime = as.POSIXct(tmp$EarliestDataTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')
+  tmp$EarliestXmitTime = as.POSIXct(tmp$EarliestXmitTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')
+  tmp$LatestDataTime = as.POSIXct(tmp$LatestDataTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')
+  tmp$LatestXmitTime = as.POSIXct(tmp$LatestXmitTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')
+
+  tmp = tmp[,colSums(is.na(tmp)) == 0]
+  return(tmp)
+
+})
+
+# bind into dataframe & select relevant columns
+Xmits = dplyr::bind_rows(trx) %>% dplyr::select(-c(SW, DeployDate, ReleaseDate, ReleaseType))
+
+# read in raw meta data
+meta = read_csv("./data/meta/Tag_Deployments_21719_summaries.csv") %>% dplyr::select(PTT, deploydate)
+meta$deploydate = as.POSIXct(meta$deploydate, format = '%m/%d/%Y', tz = 'UTC')
+
+# join Xmits from Summary files with meta to get deploydate
+Xmits = left_join(Xmits, meta, by = join_by(Ptt == PTT))
+
+
+# LOAD ALL FILES (Contains all attempted Argos transmits) - this is where the XmitTimes come from (the time of the first and last received argos messages)
+files = list.files(path = './data/L0/', pattern = '-All.csv', recursive = TRUE, full.names = TRUE, all.files = TRUE)
+all = lapply(X = files, FUN = function(x){
+  tmp = read_csv(x, show_col_types = FALSE) %>% dplyr::select(`Platform ID No.`, `Msg Date`)
+  tmp$`Msg Date` = as.POSIXct(tmp$`Msg Date`, format = '%m/%d/%Y %H:%M:%S', tz = 'UTC')
+  return(tmp)
+
+})
+
+#
+all = bind_rows(all) %>% left_join(., meta, by = join_by(`Platform ID No.` == PTT))
+
+# Filter all messages so that they occur within the year of deployment and occur after or eqaual to calendar day deployment
+firstlast = all %>% filter(as.Date(`Msg Date`) >= as.Date(deploydate), year(deploydate) == year(`Msg Date`)) %>% group_by(`Platform ID No.`) %>% summarise(FirstAllTime = min(`Msg Date`), LastAllTime = max(`Msg Date`))
+
+
+# Bind in the All First and Last transmit dates
+Xmits = left_join(Xmits, firstlast, by = join_by(Ptt == `Platform ID No.`))
+
+# Create new transmit start columns, preserving the original where it is correct
+Xmits$XmitStart = if_else(as.Date(Xmits$EarliestXmitTime) >= as.Date(Xmits$deploydate), Xmits$EarliestXmitTime, Xmits$FirstAllTime)
+Xmits$XmitEnd = if_else(year(Xmits$LatestXmitTime) == year(Xmits$deploydate), Xmits$LatestXmitTime, Xmits$LastAllTime)
+Xmits$totxmitdays = round(as.numeric(difftime(Xmits$XmitEnd, Xmits$XmitStart, units = 'days')), digits = 1)
+
+
+# WC defines this as the first and last data point (earliest includes status messages, latest does not include status messages)
+# We will define it as the first and last timestamp of the location data when the EarliestDataTime is incorrect for our deployment
+
+Xmits$DataStart = if_else(as.Date(Xmits$EarliestDataTime) >= as.Date(Xmits$deploydate), Xmits$EarliestDataTime, NA)
+Xmits$DataEnd = if_else(year(Xmits$LatestDataTime) == year(Xmits$deploydate), Xmits$LatestDataTime, NA)
+
+
+ptts = unique(Xmits$Ptt[is.na(Xmits$DataStart) | is.na(Xmits$DataEnd)])
+
+files = list.files(path = './data/L0/', recursive = T, full.names = T, pattern = '-Locations.csv')
+
+grps = sapply(X = ptts, FUN = function(x){grepl(pattern = x, x = files)})
+
+indices = which(rowSums(grps) == 1)
+
+files = files[indices]
+
+filedf = data.frame(ptt = sapply(strsplit(files, '/'), '[[', 5), file = files)
+filedf = filedf[order(filedf$ptt, filedf$file),]
+floc = filedf[grepl(pattern = '-1-Locations.csv', x = filedf$file),]
+argos = filedf[!filedf$ptt %in% floc$ptt, ]
+
+filedfmain = bind_rows(floc, argos)
+
+locs = lapply(X = filedfmain$file, FUN = function(x){
+  tmp = read_csv(x) %>% dplyr::select(Ptt, Date) 
+  tmp$Date = as.POSIXct(tmp$Date, format = "%H:%M:%OS %d-%b-%Y", tz = 'UTC')
+  return(tmp)
+})
+
+locs = bind_rows(locs)
+locs = left_join(locs, meta, by = join_by(Ptt == PTT))
+
+# Get the missing datastart times as the first and last location of on deployment
+datacalcs = locs %>% filter(as.Date(Date) >= as.Date(deploydate), year(Date) == year(deploydate)) %>% 
+  group_by(Ptt, deploydate) %>% summarize(DataStart = min(Date), DataEnd = max(Date)) %>% arrange(Ptt)
+
+baddatastartptts = Xmits$Ptt[is.na(Xmits$DataStart)]
+baddataendptts = Xmits$Ptt[is.na(Xmits$DataEnd)]
+
+
+# Merge df1 and df2 based on the "Ptt" column
+Xmits_ <- merge(Xmits, datacalcs[,c('Ptt', 'DataStart', 'DataEnd')], by = "Ptt", suffixes = c("_df1", "_df2"), all.x = TRUE)
+
+# Replace NA values in df1$DataStart with values from df2$DataStart where applicable
+Xmits_$DataStart_df1[is.na(Xmits_$DataStart_df1)] <- Xmits_$DataStart_df2[is.na(Xmits_$DataStart_df1)]
+Xmits_$DataEnd_df1[is.na(Xmits_$DataEnd_df1)] <- Xmits_$DataEnd_df2[is.na(Xmits_$DataEnd_df1)]
+
+Xmits_ = Xmits_ %>% rename(DataStart = DataStart_df1, DataEnd = DataEnd_df1) %>% 
+  dplyr::select(Ptt, PercentDecoded, Passes, PercentArgosLoc, MsgPerPass, DS, MinInterval,deploydate, XmitStart, XmitEnd, totxmitdays, DataStart, DataEnd)
+Xmits_$totdatadays = round(as.numeric(difftime(Xmits_$DataEnd, Xmits_$DataStart, units = 'days')), digits = 1)
+
+kmmeta = read_csv("./data/meta/deployment summary_20192023.csv") %>%
+  dplyr::select(PTT, tagmodel, sex, deployloc, masskg, lengthcm, girthcm, notes)
+kmmeta$tagmodel[kmmeta$PTT == 176859] = 'SPLASH10'
+
+# FIX TAG MODEL
+unique(kmmeta$tagmodel)
+
+
+# THESE SHOULD BE FASTLOC ENABLED (THEY ALL HAD FASTLOC FILES, N=23)
+# 224161 224162 224163 224164 224165 224166 224167 224168 235616 235617 235618 235619
+# 240184 240185 142351 225840 225841 225842 237647 237648 237649 237651 237652
+
+# 240186 did not transmit fastloc data but is fastloc capable
+
+
+deffastloc = c(224161, 224162, 224163, 224164, 224165, 224166, 224167, 224168, 235616, 235617, 235618, 235619,
+               240184, 240185, 142351, 225840, 225841, 225842, 237647, 237648, 237649, 237651, 237652, 240186)
+
+kmmeta$Fastloc = FALSE
+kmmeta$Fastloc[kmmeta$PTT %in% deffastloc] = TRUE
+
+# ALL DIFF SPLASH TAG MODELS AS WRITTEN IN META
+# [1] "SPLASH10-297"   "SPLASH10"      
+# [6] "SPLASH tag/GPS" "SPLASH10-296F" "SPLASH297" "SPLASH10F-297A"
+# [11] "SPLASH10-351F"  "SPLASH Fastloc" "SPLASH10F-393A" "SPLASH10F-296A" "SPLASH10F"
+kmmeta$`Tag Model New` = NA
+kmmeta$`Tag Model New`[kmmeta$tagmodel %in% c('SPOT6', 'SPOT tag', 'SPOT293', 'SPOT293A')] = 'SPOT-293'
+
+
+##### NO FASTLOC
+kmmeta$`Tag Model New`[!kmmeta$Fastloc & kmmeta$tagmodel %in% c('SPLASH10-297', 'SPLASH10', 'SPLASH10297', 'SPLASH297')] = 'SPLASH10-297'
+
+# SPLASH10-296F - 206722, 206723, 206724
+kmmeta$`Tag Model New`[!kmmeta$Fastloc & kmmeta$tagmodel == 'SPLASH10-296F'] = 'SPLASH10-296'
+
+# SPLASH10-351 config - does not have fastloc
+kmmeta$`Tag Model New`[!kmmeta$Fastloc & kmmeta$tagmodel %in% c("SPLASH10-351F")] = 'SPLASH10-351'
+
+##### FASTLOC
+# FASTLOC-297 config - we are going to assume that general SPLASH10F or GPS are SPLASH10-F-297 configs as this was the most purchased configuration
+kmmeta$`Tag Model New`[kmmeta$Fastloc & kmmeta$tagmodel %in% c('SPLASH10F-297A', 'SPLASH10F', 'SPLASH tag/GPS', 'SPLASH Fastloc')] = 'SPLASH10-F-297'
+
+# FASTLOC-296 config
+kmmeta$`Tag Model New`[kmmeta$Fastloc & kmmeta$tagmodel == 'SPLASH10F-296A'] = 'SPLASH10-F-296'
+
+# SPLASH10F-393A
+kmmeta$`Tag Model New`[kmmeta$Fastloc & kmmeta$tagmodel == 'SPLASH10F-393A'] = 'SPLASH10-F-393'
+
+
+Xmits2 = full_join(Xmits_, kmmeta, by = join_by(Ptt == PTT))
+
+colnames(Xmits2) = tolower(colnames(Xmits2))
+
+# DEFINE DATACUTOFF PERIODS
+Xmits2$cutoffstart = Xmits2$datastart
+Xmits2$cutoffend = Xmits2$dataend
+
+# RESOLVE PTT 195526 - That is the animal that died in the gillnet. I estimated the date it died to be around 6/16 based on the dive records. The tag was brought up on a boat and continued pinging back to the dock and back to the fishers house which was how I was able to find it and get it back. 
+# It says transmissions went to 7/27, so there was a full month of locational data collected when the tag wasn't on the animal.
+Xmits2$cutoffend[Xmits2$ptt == 195526] = as.POSIXct(x = '2020-06-16 00:00:00', tz = 'UTC')
+
+# Calculate dry period as the difference between deployment and first transmit
+Xmits2$dryprd_days = round(as.numeric(difftime(Xmits2$xmitstart, Xmits2$deploydate, units = 'days')), digits = 2)
+
+# write out cleaned summary and metadata file
+write_csv(x = Xmits2, file = './data/meta/Hg2019-2023_WC_Tag_summaryFiles+MetaData.csv')
+
diff --git a/src/00_argos_fastloc_QAQC.R b/src/00_argos_fastloc_QAQC.R
@@ -0,0 +1,190 @@
+# EIH
+# updated 2024-09-27
+# Location and FAST GPS Pre-PROCESSING
+# inputs: # wildlife computers portal downloads (L0) Argos and Fastloc data files 
+#       : the output of 0000_process_wc_summary_files.R for the data cutoff dates
+
+# setup
+rm(list = ls())
+gc()
+library(readr)
+library(tidyr)
+library(dplyr)
+
+getOption(x = 'digits.secs', default = 6)
+setwd('~/seal_telemetry')
+
+# source some helper functions
+source("./READ-PSB-MoveSeals/src/fxns_wildlifecomputers_readdata.R")
+
+# Will subset all data >= Deployment Date and <= Final Transmission
+
+# Load Meta Data
+meta = read_csv(file = "./data/meta/Hg2019-2023_WC_Tag_summaryFiles+MetaData.csv")
+
+
+# Get rid of bad ptts 177509 (no locations), 240183 (rehab), 240186 (tag malfunction), 
+keeps = setdiff(unique(meta$ptt), c(177509, 240186, 240183))
+meta = meta[meta$ptt %in% keeps, ]
+
+
+# list fastloc files
+
+# List fastgps files
+files = list.files("./data/L0/", pattern = '*-1-FastGPS.csv', full.names = TRUE, recursive = TRUE, ignore.case = TRUE)
+filedf = data.frame(fname = sapply(strsplit(files, '/'), '[[', 6), ptt = sapply(strsplit(files, '/'), '[[', 5))
+# Filter by keeps
+filedf = filedf[filedf$ptt %in% as.character(keeps), ]
+
+fastlocs = WC_read_fastloc(files = filedf$fname, datadir = './data/L0/')
+fastlocs$ptt = as.numeric(fastlocs$ptt)
+
+# List all location data files
+files = list.files("./data/L0/", pattern = '*-Locations.csv', recursive = TRUE, full.names = TRUE)
+
+ptts = as.numeric(sapply(strsplit(x = files, split = '/'), "[[", 5))
+length(unique(ptts))
+fnames = sapply(strsplit(x = files, split = '/'), "[[", 6)
+filedf = data.frame(ptt = ptts, fnames = fnames, fullnames = files)
+filedf = filedf[as.numeric(filedf$ptt) %in% keeps, ]
+
+# If there is both a locations file and a [single digit]-Locations.csv file... keep the 1-Locations.csv file
+fastlocFiles = filedf[grepl(pattern = '-\\d-Locations.csv', x = filedf$fnames), ]
+argosonly = filedf[-c(which(filedf$ptt %in% fastlocFiles$ptt)), ]
+
+fs = rbind(fastlocFiles, argosonly)
+fs = fs[fs$fnames != '142351-Locations.csv', ]
+
+# LOAD 61 TAGS - 2 failed to transmit
+locs = WC_read_locs(files = fs$fullnames) # 142115
+# Checks
+length(unique(locs$ptt))
+which(!fs$ptt %in% unique(locs$ptt))
+
+############################################################################################
+# MERGE BY TIME AND PTT TO GET FASTLOC ANCILLARY DATA
+# Time match to merge in sat and GPS error data
+storageL = list()
+fastloc_ptts = fastlocFiles$ptt
+for (i in 1:length(fastloc_ptts)){
+  id = fastloc_ptts[i]
+  tm1 = locs[locs$ptt == id, ]
+  tm1 = tm1[order(tm1$datetime),]
+  tm2 = fastlocs[fastlocs$ptt == id, ]
+  tm2 = tm2[order(tm2$datetime), ]
+  tm2$RowIndex = 1:nrow(tm2)
+  tm1$timematch_idx = findInterval(x = tm1$datetime, vec = c(-Inf, head(tm2$datetime, -1)) + c(0, diff(tm2$datetime)/2))
+  tm1$timematch_idx[tm1$type == 'Argos'] = NA
+
+  colnames(tm2)[9] = 'fastloc_datetime'
+  tm2 = tm2[,c('ptt', 'fastloc_datetime','hauled.out', 'satellites', 'residual', 'time.error', 'RowIndex')]
+
+  merger = left_join(tm1, tm2, by = join_by(ptt, timematch_idx == RowIndex))
+
+  storageL[[i]] = merger
+
+}
+
+merged_fltags = do.call(rbind, storageL)
+
+argosdata = locs[!locs$ptt %in% fastloc_ptts, ]
+
+# Recombine the merged fastloc data and the argos only tags
+locs_ = bind_rows(argosdata, merged_fltags)
+
+##### ELIMINATE PRE DEPLOYMENT DATA AND POST-DEPLOYMENT DATA 
+
+# merge with meta to get the transmission cutoff dates 
+
+locs2 = left_join(locs_, meta, by = 'ptt') %>% 
+
+  filter(datetime >= cutoffstart, datetime <= cutoffend)%>%
+  rename(lat=latitude,lon=longitude,smaj=error.semi.major.axis, smin=error.semi.minor.axis,
+         lc=quality, eor= error.ellipse.orientation,id=ptt)
+
+
+# eliminated 1379 locations left on tags pre-deployment or post-deployment transmissions that are not on animal 
+nrow(locs) - nrow(locs2)
+
+# write out data
+write_csv(x = locs2, file = './data/L1/locs/Hg_2019-2023_CombinedDeploymentLocs.csv')
+
+# clean workspace
+rm(list = setdiff(ls(), 'locs2'))
+
+# Eliminate FastGPS positions that have both bad sats (fewer than 6) and high residuals (>= 30) (Dujon et al)
+badgps = which(locs2$residual > 30 & locs2$satellites < 6)
+
+if(length(badgps) > 0 ){
+  locs3 = locs2[-c(badgps), ]
+}else{locs3 = locs2}
+
+
+#Impossible positions no matter what LC class
+locs4 = locs3[which(locs3$lon > -77 & locs3$lon <= -54 & locs3$lat >= 35.0),] 
+
+# Calculate percentage location classes
+table(locs4$lc, useNA = 'ifany')
+round(table(locs2$lc, useNA = 'ifany') / nrow(locs4) * 100)
+
+# Remove Zs
+locs4 = locs4[locs4$lc != 'Z', ]
+table(locs4$lc, useNA = 'ifany')
+round(table(locs4$lc, useNA = 'ifany') / nrow(locs4) * 100)
+
+# Get rid of NA
+locs4 = locs4[!is.na(locs4$lc), ]
+
+# remove duplicates
+locs4 = locs4 %>% arrange(id, datetime, desc(error.radius))
+
+dupes = which(duplicated(locs4[,c('id', 'datetime')]))
+
+locs5 = locs4[-dupes,]
+table(locs5$lc, useNA = 'ifany')
+length(unique(locs5$id))
+
+# Remove those with comments indicating erroneous location
+locs6 = locs5[is.na(locs5$comment), ]
+
+table(locs6$type)
+
+# GET NUMBERS FOR PRE WINDFARM CONTSTRUCTION PERIOD
+nrow(locs6[locs6$datetime < as.POSIXct(x = '2023-06-01 00:00:00', tz = 'UTC'),])
+table(locs6$type[locs6$datetime < as.POSIXct(x = '2023-06-01 00:00:00', tz = 'UTC')])
+length(unique(locs6$id))
+# write out filtered data
+write.csv(x = locs6, file = './data/L1/locs/Hg_2019-2023_Prefiltered_Locs.csv', row.names = F)
+
+# SCRATCH
+# A PRIORI SPEED FILTER 
+
+# APRIORI SPEED FILTER FROM ARGOS FILTER 
+# data = locs3
+# 
+# ll = split(data, data$id)
+# 
+# resL = lapply(X = ll, FUN = function(x, vmax = 10){
+#   
+#   Gs = x[x$lc == 'G', ]
+#   tmp = x[x$lc != 'G',]
+#   tmp = tmp[order(tmp$date), ]
+#   filt = argosfilter::sdafilter(lat = tmp$lat, 
+#                                 lon = tmp$lon, 
+#                                 dtime = tmp$datetime, 
+#                                 lc = tmp$lc, 
+#                                 vmax = vmax, ang = -1)
+#   
+#   dat = tmp[which(filt %in% c('not', 'end_location')), ]
+#   print('n removed:')
+#   print(nrow(tmp) - nrow(dat))
+#   fin = rbind(dat, Gs)
+#   fin = fin[order(fin$datetime), ]
+#   
+#   return(fin)
+#   rm(tmp, dat, fin)
+#   
+# })
+
+#locs_filt = bind_rows(resL)
+#rm(ll, data)