Skip to content

Commit

Permalink
initial commit of manuscript source code as maintained in git repo RE…
Browse files Browse the repository at this point in the history
…AD-PSB-MoveSeals
  • Loading branch information
Eleanor Heywood committed Sep 30, 2024
1 parent 1c9e1d6 commit 7faabb5
Show file tree
Hide file tree
Showing 29 changed files with 6,989 additions and 0 deletions.
199 changes: 199 additions & 0 deletions src/0000_process_wc_summary_files.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
# Process wildlife computers transmission summary files to get some useful metadata
# EIH
# updated: 2024-09-27
# create date cutoffs for data based on start and end of data transmission records
# inputs: wildlife computer summary files, '*-Summary.csv' files, '*.All.csv files, and location files
# Tag_Deployments_21719_summaries.csv
# performs some cleaning and joining with meta data to produce cleaned summary and meta data records
# WILDLIFE COMPUTER DEFINITIONS:
# https://static.wildlifecomputers.com/2019/05/10152339/Spreadsheet-File-Descriptions.pdf

# But Briefly, I will describe relevant columns for subsetting DEPLOYMENT records
# EarliestXmitTime & LatestXmitTime: the time of the first and last received Argos transmission (should be the initial and final record in ALL)
######## This be inaccurate if the tag was turned on for testing prior to deployment.
# As such, to fix this issue, we will subset ALL.csv files to all Argos records day of or after deployment date and occurring in the same deploy year. Then take the min and max of this

# EarliestDataTime: The time stamp of the first data point, including status messages. This should be close to the deployment date.
# This can be error prone as well as is the case with Robs tags where he does not clear data. To remedy, I will pull the first location from the locations file that occurs during or after deployment day
# LatestDataTime: The time stamp of the last data point, excluding status messages
# This can be error prone due to redeployments in subsequent years etc. As such I will replace these errors with the final location timestamp occurring in the year of the target animal deployment.
rm(list = ls())
gc()
setwd("~/seal_telemetry/")
library(readr)
library(tidyr)
library(dplyr)
library(lubridate)


# load Summary files (N=63)
files = list.files(path = './data/L0/', pattern = '-Summary.csv', recursive = TRUE, full.names = TRUE, all.files = TRUE)

trx = lapply(X = files, FUN = function(x){
tmp = read.csv(x)
tmp$DeployID = as.character(tmp$DeployID)
tmp$EarliestDataTime = as.POSIXct(tmp$EarliestDataTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')
tmp$EarliestXmitTime = as.POSIXct(tmp$EarliestXmitTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')
tmp$LatestDataTime = as.POSIXct(tmp$LatestDataTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')
tmp$LatestXmitTime = as.POSIXct(tmp$LatestXmitTime, format = "%H:%M:%S %d-%b-%Y", tz = 'UTC')

tmp = tmp[,colSums(is.na(tmp)) == 0]
return(tmp)

})

# bind into dataframe & select relevant columns
Xmits = dplyr::bind_rows(trx) %>% dplyr::select(-c(SW, DeployDate, ReleaseDate, ReleaseType))

# read in raw meta data
meta = read_csv("./data/meta/Tag_Deployments_21719_summaries.csv") %>% dplyr::select(PTT, deploydate)
meta$deploydate = as.POSIXct(meta$deploydate, format = '%m/%d/%Y', tz = 'UTC')

# join Xmits from Summary files with meta to get deploydate
Xmits = left_join(Xmits, meta, by = join_by(Ptt == PTT))


# LOAD ALL FILES (Contains all attempted Argos transmits) - this is where the XmitTimes come from (the time of the first and last received argos messages)
files = list.files(path = './data/L0/', pattern = '-All.csv', recursive = TRUE, full.names = TRUE, all.files = TRUE)
all = lapply(X = files, FUN = function(x){
tmp = read_csv(x, show_col_types = FALSE) %>% dplyr::select(`Platform ID No.`, `Msg Date`)
tmp$`Msg Date` = as.POSIXct(tmp$`Msg Date`, format = '%m/%d/%Y %H:%M:%S', tz = 'UTC')
return(tmp)

})

#
all = bind_rows(all) %>% left_join(., meta, by = join_by(`Platform ID No.` == PTT))

# Filter all messages so that they occur within the year of deployment and occur after or eqaual to calendar day deployment
firstlast = all %>% filter(as.Date(`Msg Date`) >= as.Date(deploydate), year(deploydate) == year(`Msg Date`)) %>% group_by(`Platform ID No.`) %>% summarise(FirstAllTime = min(`Msg Date`), LastAllTime = max(`Msg Date`))


# Bind in the All First and Last transmit dates
Xmits = left_join(Xmits, firstlast, by = join_by(Ptt == `Platform ID No.`))

# Create new transmit start columns, preserving the original where it is correct
Xmits$XmitStart = if_else(as.Date(Xmits$EarliestXmitTime) >= as.Date(Xmits$deploydate), Xmits$EarliestXmitTime, Xmits$FirstAllTime)
Xmits$XmitEnd = if_else(year(Xmits$LatestXmitTime) == year(Xmits$deploydate), Xmits$LatestXmitTime, Xmits$LastAllTime)
Xmits$totxmitdays = round(as.numeric(difftime(Xmits$XmitEnd, Xmits$XmitStart, units = 'days')), digits = 1)


# WC defines this as the first and last data point (earliest includes status messages, latest does not include status messages)
# We will define it as the first and last timestamp of the location data when the EarliestDataTime is incorrect for our deployment

Xmits$DataStart = if_else(as.Date(Xmits$EarliestDataTime) >= as.Date(Xmits$deploydate), Xmits$EarliestDataTime, NA)
Xmits$DataEnd = if_else(year(Xmits$LatestDataTime) == year(Xmits$deploydate), Xmits$LatestDataTime, NA)


ptts = unique(Xmits$Ptt[is.na(Xmits$DataStart) | is.na(Xmits$DataEnd)])

files = list.files(path = './data/L0/', recursive = T, full.names = T, pattern = '-Locations.csv')

grps = sapply(X = ptts, FUN = function(x){grepl(pattern = x, x = files)})

indices = which(rowSums(grps) == 1)

files = files[indices]

filedf = data.frame(ptt = sapply(strsplit(files, '/'), '[[', 5), file = files)
filedf = filedf[order(filedf$ptt, filedf$file),]
floc = filedf[grepl(pattern = '-1-Locations.csv', x = filedf$file),]
argos = filedf[!filedf$ptt %in% floc$ptt, ]

filedfmain = bind_rows(floc, argos)

locs = lapply(X = filedfmain$file, FUN = function(x){
tmp = read_csv(x) %>% dplyr::select(Ptt, Date)
tmp$Date = as.POSIXct(tmp$Date, format = "%H:%M:%OS %d-%b-%Y", tz = 'UTC')
return(tmp)
})

locs = bind_rows(locs)
locs = left_join(locs, meta, by = join_by(Ptt == PTT))

# Get the missing datastart times as the first and last location of on deployment
datacalcs = locs %>% filter(as.Date(Date) >= as.Date(deploydate), year(Date) == year(deploydate)) %>%
group_by(Ptt, deploydate) %>% summarize(DataStart = min(Date), DataEnd = max(Date)) %>% arrange(Ptt)

baddatastartptts = Xmits$Ptt[is.na(Xmits$DataStart)]
baddataendptts = Xmits$Ptt[is.na(Xmits$DataEnd)]


# Merge df1 and df2 based on the "Ptt" column
Xmits_ <- merge(Xmits, datacalcs[,c('Ptt', 'DataStart', 'DataEnd')], by = "Ptt", suffixes = c("_df1", "_df2"), all.x = TRUE)

# Replace NA values in df1$DataStart with values from df2$DataStart where applicable
Xmits_$DataStart_df1[is.na(Xmits_$DataStart_df1)] <- Xmits_$DataStart_df2[is.na(Xmits_$DataStart_df1)]
Xmits_$DataEnd_df1[is.na(Xmits_$DataEnd_df1)] <- Xmits_$DataEnd_df2[is.na(Xmits_$DataEnd_df1)]

Xmits_ = Xmits_ %>% rename(DataStart = DataStart_df1, DataEnd = DataEnd_df1) %>%
dplyr::select(Ptt, PercentDecoded, Passes, PercentArgosLoc, MsgPerPass, DS, MinInterval,deploydate, XmitStart, XmitEnd, totxmitdays, DataStart, DataEnd)
Xmits_$totdatadays = round(as.numeric(difftime(Xmits_$DataEnd, Xmits_$DataStart, units = 'days')), digits = 1)

kmmeta = read_csv("./data/meta/deployment summary_20192023.csv") %>%
dplyr::select(PTT, tagmodel, sex, deployloc, masskg, lengthcm, girthcm, notes)
kmmeta$tagmodel[kmmeta$PTT == 176859] = 'SPLASH10'

# FIX TAG MODEL
unique(kmmeta$tagmodel)


# THESE SHOULD BE FASTLOC ENABLED (THEY ALL HAD FASTLOC FILES, N=23)
# 224161 224162 224163 224164 224165 224166 224167 224168 235616 235617 235618 235619
# 240184 240185 142351 225840 225841 225842 237647 237648 237649 237651 237652

# 240186 did not transmit fastloc data but is fastloc capable


deffastloc = c(224161, 224162, 224163, 224164, 224165, 224166, 224167, 224168, 235616, 235617, 235618, 235619,
240184, 240185, 142351, 225840, 225841, 225842, 237647, 237648, 237649, 237651, 237652, 240186)

kmmeta$Fastloc = FALSE
kmmeta$Fastloc[kmmeta$PTT %in% deffastloc] = TRUE

# ALL DIFF SPLASH TAG MODELS AS WRITTEN IN META
# [1] "SPLASH10-297" "SPLASH10"
# [6] "SPLASH tag/GPS" "SPLASH10-296F" "SPLASH297" "SPLASH10F-297A"
# [11] "SPLASH10-351F" "SPLASH Fastloc" "SPLASH10F-393A" "SPLASH10F-296A" "SPLASH10F"
kmmeta$`Tag Model New` = NA
kmmeta$`Tag Model New`[kmmeta$tagmodel %in% c('SPOT6', 'SPOT tag', 'SPOT293', 'SPOT293A')] = 'SPOT-293'


##### NO FASTLOC
kmmeta$`Tag Model New`[!kmmeta$Fastloc & kmmeta$tagmodel %in% c('SPLASH10-297', 'SPLASH10', 'SPLASH10297', 'SPLASH297')] = 'SPLASH10-297'

# SPLASH10-296F - 206722, 206723, 206724
kmmeta$`Tag Model New`[!kmmeta$Fastloc & kmmeta$tagmodel == 'SPLASH10-296F'] = 'SPLASH10-296'

# SPLASH10-351 config - does not have fastloc
kmmeta$`Tag Model New`[!kmmeta$Fastloc & kmmeta$tagmodel %in% c("SPLASH10-351F")] = 'SPLASH10-351'

##### FASTLOC
# FASTLOC-297 config - we are going to assume that general SPLASH10F or GPS are SPLASH10-F-297 configs as this was the most purchased configuration
kmmeta$`Tag Model New`[kmmeta$Fastloc & kmmeta$tagmodel %in% c('SPLASH10F-297A', 'SPLASH10F', 'SPLASH tag/GPS', 'SPLASH Fastloc')] = 'SPLASH10-F-297'

# FASTLOC-296 config
kmmeta$`Tag Model New`[kmmeta$Fastloc & kmmeta$tagmodel == 'SPLASH10F-296A'] = 'SPLASH10-F-296'

# SPLASH10F-393A
kmmeta$`Tag Model New`[kmmeta$Fastloc & kmmeta$tagmodel == 'SPLASH10F-393A'] = 'SPLASH10-F-393'


Xmits2 = full_join(Xmits_, kmmeta, by = join_by(Ptt == PTT))

colnames(Xmits2) = tolower(colnames(Xmits2))

# DEFINE DATACUTOFF PERIODS
Xmits2$cutoffstart = Xmits2$datastart
Xmits2$cutoffend = Xmits2$dataend

# RESOLVE PTT 195526 - That is the animal that died in the gillnet. I estimated the date it died to be around 6/16 based on the dive records. The tag was brought up on a boat and continued pinging back to the dock and back to the fishers house which was how I was able to find it and get it back.
# It says transmissions went to 7/27, so there was a full month of locational data collected when the tag wasn't on the animal.
Xmits2$cutoffend[Xmits2$ptt == 195526] = as.POSIXct(x = '2020-06-16 00:00:00', tz = 'UTC')

# Calculate dry period as the difference between deployment and first transmit
Xmits2$dryprd_days = round(as.numeric(difftime(Xmits2$xmitstart, Xmits2$deploydate, units = 'days')), digits = 2)

# write out cleaned summary and metadata file
write_csv(x = Xmits2, file = './data/meta/Hg2019-2023_WC_Tag_summaryFiles+MetaData.csv')

190 changes: 190 additions & 0 deletions src/00_argos_fastloc_QAQC.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
# EIH
# updated 2024-09-27
# Location and FAST GPS Pre-PROCESSING
# inputs: # wildlife computers portal downloads (L0) Argos and Fastloc data files
# : the output of 0000_process_wc_summary_files.R for the data cutoff dates

# setup
rm(list = ls())
gc()
library(readr)
library(tidyr)
library(dplyr)

getOption(x = 'digits.secs', default = 6)
setwd('~/seal_telemetry')

# source some helper functions
source("./READ-PSB-MoveSeals/src/fxns_wildlifecomputers_readdata.R")

# Will subset all data >= Deployment Date and <= Final Transmission

# Load Meta Data
meta = read_csv(file = "./data/meta/Hg2019-2023_WC_Tag_summaryFiles+MetaData.csv")


# Get rid of bad ptts 177509 (no locations), 240183 (rehab), 240186 (tag malfunction),
keeps = setdiff(unique(meta$ptt), c(177509, 240186, 240183))
meta = meta[meta$ptt %in% keeps, ]


# list fastloc files

# List fastgps files
files = list.files("./data/L0/", pattern = '*-1-FastGPS.csv', full.names = TRUE, recursive = TRUE, ignore.case = TRUE)
filedf = data.frame(fname = sapply(strsplit(files, '/'), '[[', 6), ptt = sapply(strsplit(files, '/'), '[[', 5))
# Filter by keeps
filedf = filedf[filedf$ptt %in% as.character(keeps), ]

fastlocs = WC_read_fastloc(files = filedf$fname, datadir = './data/L0/')
fastlocs$ptt = as.numeric(fastlocs$ptt)

# List all location data files
files = list.files("./data/L0/", pattern = '*-Locations.csv', recursive = TRUE, full.names = TRUE)

ptts = as.numeric(sapply(strsplit(x = files, split = '/'), "[[", 5))
length(unique(ptts))
fnames = sapply(strsplit(x = files, split = '/'), "[[", 6)
filedf = data.frame(ptt = ptts, fnames = fnames, fullnames = files)
filedf = filedf[as.numeric(filedf$ptt) %in% keeps, ]

# If there is both a locations file and a [single digit]-Locations.csv file... keep the 1-Locations.csv file
fastlocFiles = filedf[grepl(pattern = '-\\d-Locations.csv', x = filedf$fnames), ]
argosonly = filedf[-c(which(filedf$ptt %in% fastlocFiles$ptt)), ]

fs = rbind(fastlocFiles, argosonly)
fs = fs[fs$fnames != '142351-Locations.csv', ]

# LOAD 61 TAGS - 2 failed to transmit
locs = WC_read_locs(files = fs$fullnames) # 142115
# Checks
length(unique(locs$ptt))
which(!fs$ptt %in% unique(locs$ptt))

############################################################################################
# MERGE BY TIME AND PTT TO GET FASTLOC ANCILLARY DATA
# Time match to merge in sat and GPS error data
storageL = list()
fastloc_ptts = fastlocFiles$ptt
for (i in 1:length(fastloc_ptts)){
id = fastloc_ptts[i]
tm1 = locs[locs$ptt == id, ]
tm1 = tm1[order(tm1$datetime),]
tm2 = fastlocs[fastlocs$ptt == id, ]
tm2 = tm2[order(tm2$datetime), ]
tm2$RowIndex = 1:nrow(tm2)
tm1$timematch_idx = findInterval(x = tm1$datetime, vec = c(-Inf, head(tm2$datetime, -1)) + c(0, diff(tm2$datetime)/2))
tm1$timematch_idx[tm1$type == 'Argos'] = NA

colnames(tm2)[9] = 'fastloc_datetime'
tm2 = tm2[,c('ptt', 'fastloc_datetime','hauled.out', 'satellites', 'residual', 'time.error', 'RowIndex')]

merger = left_join(tm1, tm2, by = join_by(ptt, timematch_idx == RowIndex))

storageL[[i]] = merger

}

merged_fltags = do.call(rbind, storageL)

argosdata = locs[!locs$ptt %in% fastloc_ptts, ]

# Recombine the merged fastloc data and the argos only tags
locs_ = bind_rows(argosdata, merged_fltags)

##### ELIMINATE PRE DEPLOYMENT DATA AND POST-DEPLOYMENT DATA

# merge with meta to get the transmission cutoff dates

locs2 = left_join(locs_, meta, by = 'ptt') %>%

filter(datetime >= cutoffstart, datetime <= cutoffend)%>%
rename(lat=latitude,lon=longitude,smaj=error.semi.major.axis, smin=error.semi.minor.axis,
lc=quality, eor= error.ellipse.orientation,id=ptt)


# eliminated 1379 locations left on tags pre-deployment or post-deployment transmissions that are not on animal
nrow(locs) - nrow(locs2)

# write out data
write_csv(x = locs2, file = './data/L1/locs/Hg_2019-2023_CombinedDeploymentLocs.csv')

# clean workspace
rm(list = setdiff(ls(), 'locs2'))

# Eliminate FastGPS positions that have both bad sats (fewer than 6) and high residuals (>= 30) (Dujon et al)
badgps = which(locs2$residual > 30 & locs2$satellites < 6)

if(length(badgps) > 0 ){
locs3 = locs2[-c(badgps), ]
}else{locs3 = locs2}


#Impossible positions no matter what LC class
locs4 = locs3[which(locs3$lon > -77 & locs3$lon <= -54 & locs3$lat >= 35.0),]

# Calculate percentage location classes
table(locs4$lc, useNA = 'ifany')
round(table(locs2$lc, useNA = 'ifany') / nrow(locs4) * 100)

# Remove Zs
locs4 = locs4[locs4$lc != 'Z', ]
table(locs4$lc, useNA = 'ifany')
round(table(locs4$lc, useNA = 'ifany') / nrow(locs4) * 100)

# Get rid of NA
locs4 = locs4[!is.na(locs4$lc), ]

# remove duplicates
locs4 = locs4 %>% arrange(id, datetime, desc(error.radius))

dupes = which(duplicated(locs4[,c('id', 'datetime')]))

locs5 = locs4[-dupes,]
table(locs5$lc, useNA = 'ifany')
length(unique(locs5$id))

# Remove those with comments indicating erroneous location
locs6 = locs5[is.na(locs5$comment), ]

table(locs6$type)

# GET NUMBERS FOR PRE WINDFARM CONTSTRUCTION PERIOD
nrow(locs6[locs6$datetime < as.POSIXct(x = '2023-06-01 00:00:00', tz = 'UTC'),])
table(locs6$type[locs6$datetime < as.POSIXct(x = '2023-06-01 00:00:00', tz = 'UTC')])
length(unique(locs6$id))
# write out filtered data
write.csv(x = locs6, file = './data/L1/locs/Hg_2019-2023_Prefiltered_Locs.csv', row.names = F)

# SCRATCH
# A PRIORI SPEED FILTER

# APRIORI SPEED FILTER FROM ARGOS FILTER
# data = locs3
#
# ll = split(data, data$id)
#
# resL = lapply(X = ll, FUN = function(x, vmax = 10){
#
# Gs = x[x$lc == 'G', ]
# tmp = x[x$lc != 'G',]
# tmp = tmp[order(tmp$date), ]
# filt = argosfilter::sdafilter(lat = tmp$lat,
# lon = tmp$lon,
# dtime = tmp$datetime,
# lc = tmp$lc,
# vmax = vmax, ang = -1)
#
# dat = tmp[which(filt %in% c('not', 'end_location')), ]
# print('n removed:')
# print(nrow(tmp) - nrow(dat))
# fin = rbind(dat, Gs)
# fin = fin[order(fin$datetime), ]
#
# return(fin)
# rm(tmp, dat, fin)
#
# })

#locs_filt = bind_rows(resL)
#rm(ll, data)
Loading

0 comments on commit 7faabb5

Please sign in to comment.