*shankar* initial commit

fluo-2 · Mar 25, 2020 · 127fae3 · 127fae3
commit 127fae3
Show file tree

Hide file tree

Showing 18 changed files with 699 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,38 @@
+*png
+
+# History files
+.Rhistory
+.Rapp.history
+
+# Session Data files
+.RData
+
+# Example code in package build process
+*-Ex.R
+
+# Output files from R CMD build
+/*.tar.gz
+
+# Output files from R CMD check
+/*.Rcheck/
+
+# RStudio files
+.Rproj.user/
+
+# produced vignettes
+vignettes/*.html
+vignettes/*.pdf
+
+# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
+.httr-oauth
+
+# knitr and R markdown default cache directories
+/*_cache/
+/cache/
+
+# Temporary files created by R markdown
+*.utf8.md
+*.knit.md
+
+# Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html
+rsconnect/
diff --git a/README.md b/README.md
@@ -0,0 +1,49 @@
+## flox_ramses
+
+A joint project to organize and calculate key remote sensing output metrics. Input data was collected during field-work and the next task involves filtering and calculating key optical coefficients.
+
+In order to use this repository for testing and submitting pull requests, it is advised to enable a pre-commit hook which keeps python dependencies in `requirements.txt` up-to-date.
+
+```shell
+$ ./init.sh
+```
+
+### 1. Aggregating flox data
+
+In order to aggregate raw flox data into a csv file, place the relevant flox data inside `./data/flox` and execute the following:
+
+```shell
+$ Rscript aggregate_flox.R
+```
+
+The corresponding output will be saved in the `./out` directory.
+
+### 2. Parsing RAMSES data and writing to JSON
+
+The script `dat2json.py` parses a RAMSES text output file into a python dictionary and correspondingly writes it to the `./out` directory as a JSON file.
+
+```
+usage: dat2json.py [-h] [--out OUT] -i INPUT
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --out OUT             name of output json <default:'out'>
+
+required named arguments:
+  -i INPUT, --input INPUT
+                        name of input file, eg. 'ramses.dat'
+```
+
+### 3. Visualization of pond-based results
+
+`figure_vis.R` contains a workflow to plot pond-based results after preprocessing a json file from `dat2json.py`.
+
+`temporal_slice_vis.R` contains a workflow to plot pond-based results after preprocessing based on temporal slices.
+
+### 4. Changelog
+
+Proposed changes to our workflow are highlighted in `todos.md`.
+
+### Authors
+
+Atreya Shankar, Remika Gupana
diff --git a/aggregate_flox.R b/aggregate_flox.R
@@ -0,0 +1,163 @@
+#!/usr/bin/env Rscript
+# -*- coding: utf-8 -*-
+
+###########################
+# source key functions
+###########################
+
+source("./aux/cleanUp.R", encoding = "UTF-8")
+source("./aux/nLines.R", encoding = "UTF-8")
+source("./aux/disect.R", encoding = "UTF-8")
+
+###########################
+# create layers
+# create source functions
+###########################
+
+# note: subjective to most recent run
+megalist <- list.files(paste(getwd(), "/data/flox", sep=""), full.names=TRUE)
+megalist <- megalist[-c(1:3)]
+megalist <- megalist[-c(5,6,55,56)]
+myList <- list()
+final.results.fine <- list()
+final.results.full <- list()
+p = 0.0245
+for(i in 1:(length(megalist)/4)){
+  myList[[i]] <- megalist[c((4*i-3):(4*i))]
+}
+
+###########################
+# create variables
+###########################
+
+cal.fine <- read.csv(paste(getwd(),"/data/flox/00_CAL_Fine.csv", sep=""), sep = ";", header = F, stringsAsFactors = FALSE)
+cal.fine <- cleanUp(cal.fine)
+cal.full <- read.csv(paste(getwd(),"/data/flox/00_CAL_Full.csv", sep=""), sep = ";", header = F, stringsAsFactors = FALSE)
+cal.full <- cleanUp(cal.full)
+
+###########################
+# create workflows per 4 main entries in megalist
+###########################
+
+pb.overall <- txtProgressBar(min = 0, max = length(myList), initial = 0, char = "=",
+                             width = options()$width, style = 3, file = "")
+start.time <- Sys.time()
+for(i in 1:length(myList)){
+  result.fine <- data.frame(matrix(ncol = 13))
+  names(result.fine) <- c("luFrac", "lskyFrac", "edFrac1", "edFrac2", "result.fine.1", "result.fine.2", "wl", "date", "time", "lat", "lon", "i", "j")
+  result.full <- data.frame(matrix(ncol = 13))
+  names(result.full) <- c("luFrac", "lskyFrac", "edFrac1", "edFrac2", "result.full.1", "result.full.2", "wl", "date", "time", "lat", "lon", "i", "j")
+  # can add new lsky possibilities here, but exception of last case
+  lsky.fine <- disect(myList[[i]][1])
+  lsky.full <- disect(myList[[i]][2])
+  lu.fine <- disect(myList[[i]][3])
+  lu.full <- disect(myList[[i]][4])
+  if(i < length(myList)){
+    lsky.fine2 <- disect(myList[[i+1]][1])
+    lsky.full2 <- disect(myList[[i+1]][2])
+    # test for fine
+    orig.fine <- as.numeric(lu.fine[[2]][[2]][which(lu.fine[[2]][[1]] == "GPS_TIME_UTC=")+1,])
+    session.fine <- as.numeric(lsky.fine[[2]][[length(lsky.fine[[2]])]][which(lsky.fine[[2]][[1]] == "GPS_TIME_UTC=")+1,])
+    session.fine2 <- as.numeric(lsky.fine2[[2]][[2]][which(lsky.fine2[[2]][[1]] == "GPS_TIME_UTC=")+1,])
+    check.fine <- c(abs(orig.fine-session.fine), abs(orig.fine-session.fine2))
+    checkIndex.fine <- which(check.fine == min(check.fine))
+    # test for full
+    orig.full <- as.numeric(lu.full[[2]][[2]][which(lu.full[[2]][[1]] == "GPS_TIME_UTC=")+1,])
+    session.full <- as.numeric(lsky.full[[2]][[length(lsky.full[[2]])]][which(lsky.full[[2]][[1]] == "GPS_TIME_UTC=")+1,])
+    session.full2 <- as.numeric(lsky.full2[[2]][[2]][which(lsky.full2[[2]][[1]] == "GPS_TIME_UTC=")+1,])
+    check.full <- c(abs(orig.full-session.full), abs(orig.full-session.full2))
+    checkIndex.full <- which(check.full == min(check.full))
+  } else {
+    checkIndex.fine <- 1
+    checkIndex.full <- 1
+  }
+  w <- length(final.results.fine)
+  # rune pipeline for fine
+  for(j in 1:min(c(length(lu.fine[[1]]), length(lsky.fine[[1]])))){
+    for(k in 1:1024){
+      lufrac <- (lu.fine[[1]][[j]][k,2]-lu.fine[[1]][[j]][k,5])/(as.numeric(lu.fine[[2]][[j]][which(lu.fine[[2]][[j]] == "IT_VEG[us]=")+1,])*cal.fine[k,2])
+      result.fine[k,1] <- lufrac
+      if(checkIndex.fine == 1){
+        lskyfrac <- ((p*(lsky.fine[[1]][[length(lsky.fine[[2]])]][k,2]-lsky.fine[[1]][[length(lsky.fine[[2]])]][k,5]))/(as.numeric(lsky.fine[[2]][[length(lsky.fine[[2]])]][which(lsky.fine[[2]][[length(lsky.fine[[2]])]] == "IT_VEG[us]=")+1,])*cal.fine[k,2]))
+      } else if(checkIndex.fine == 2){
+        lskyfrac <- ((p*(lsky.fine2[[1]][[2]][k,2]-lsky.fine2[[1]][[2]][k,5]))/(as.numeric(lsky.fine2[[2]][[2]][which(lsky.fine2[[2]][[2]] == "IT_VEG[us]=")+1,])*cal.fine[k,2]))
+      }
+      result.fine[k,2] <- lskyfrac
+      edfrac1 <- (lu.fine[[1]][[j]][k,1]-lu.fine[[1]][[j]][k,4])/(as.numeric(lu.fine[[2]][[j]][which(lu.fine[[2]][[j]] == "IT_WR[us]=")+1,])*cal.fine[k,3])
+      result.fine[k,3] <- edfrac1
+      edfrac2 <- (lu.fine[[1]][[j]][k,3]-lu.fine[[1]][[j]][k,4])/(as.numeric(lu.fine[[2]][[j]][which(lu.fine[[2]][[j]] == "IT_WR[us]=")+1,])*cal.fine[k,3])
+      result.fine[k,4] <- edfrac2
+      result1 <- (lufrac-lskyfrac)/edfrac1
+      result.fine[k,5] <- result1
+      result2 <- (lufrac-lskyfrac)/edfrac2
+      result.fine[k,6] <- result2
+      result.fine[k,7] <- cal.fine[k,1]
+      date <- lu.fine[[2]][[j]][which(lu.fine[[2]][[j]] == "GPS_date=")+1,]
+      time <- lu.fine[[2]][[j]][which(lu.fine[[2]][[j]] == "GPS_TIME_UTC=")+1,]
+      result.fine[k,8] <- date
+      result.fine[k,9] <- time
+      lat <- lu.fine[[2]][[j]][which(lu.fine[[2]][[j]] == "GPS_lat=")+1,]
+      result.fine[k,10] <- lat
+      lon <- lu.fine[[2]][[j]][which(lu.fine[[2]][[j]] == "GPS_lon=")+1,]
+      result.fine[k,11] <- lon
+      result.fine[k,12] <- i
+      result.fine[k,13] <- j
+    }
+    m = j + w
+    final.results.fine[[m]] <- result.fine
+  }
+  w <- length(final.results.full)
+  # run pipeline for full
+  for(j in 1:min(c(length(lu.full[[1]]), length(lsky.full[[1]])))){
+    for(k in 1:1024){
+      lufrac <- (lu.full[[1]][[j]][k,2]-lu.full[[1]][[j]][k,5])/(as.numeric(lu.full[[2]][[j]][which(lu.full[[2]][[j]] == "IT_VEG[us]=")+1,])*cal.full[k,2])
+      result.full[k,1] <- lufrac
+      if(checkIndex.full == 1){
+        lskyfrac <- ((p*(lsky.full[[1]][[length(lsky.full[[2]])]][k,2]-lsky.full[[1]][[length(lsky.full[[2]])]][k,5]))/(as.numeric(lsky.full[[2]][[length(lsky.full[[2]])]][which(lsky.full[[2]][[length(lsky.full[[2]])]] == "IT_VEG[us]=")+1,])*cal.full[k,2]))
+      } else if(checkIndex.full == 2){
+        lskyfrac <- ((p*(lsky.full2[[1]][[2]][k,2]-lsky.full2[[1]][[2]][k,5]))/(as.numeric(lsky.full2[[2]][[2]][which(lsky.full2[[2]][[2]] == "IT_VEG[us]=")+1,])*cal.full[k,2]))
+      }
+      lskyfrac <- ((p*(lsky.full[[1]][[j]][k,2]-lsky.full[[1]][[j]][k,5]))/(as.numeric(lsky.full[[2]][[j]][which(lu.full[[2]][[j]] == "IT_VEG[us]=")+1,])*cal.full[k,2]))
+      result.full[k,2] <- lskyfrac
+      edfrac1 <- (lu.full[[1]][[j]][k,1]-lu.full[[1]][[j]][k,4])/(as.numeric(lu.full[[2]][[j]][which(lu.full[[2]][[j]] == "IT_WR[us]=")+1,])*cal.full[k,3])
+      result.full[k,3] <- edfrac1
+      edfrac2 <- (lu.full[[1]][[j]][k,3]-lu.full[[1]][[j]][k,4])/(as.numeric(lu.full[[2]][[j]][which(lu.full[[2]][[j]] == "IT_WR[us]=")+1,])*cal.full[k,3])
+      result.full[k,4] <- edfrac2
+      result1 <- (lufrac-lskyfrac)/edfrac1
+      result.full[k,5] <- result1
+      result2 <- (lufrac-lskyfrac)/edfrac2
+      result.full[k,6] <- result2
+      result.full[k,7] <- cal.full[k,1]
+      date <- lu.full[[2]][[j]][which(lu.full[[2]][[j]] == "GPS_date=")+1,]
+      time <- lu.full[[2]][[j]][which(lu.full[[2]][[j]] == "GPS_TIME_UTC=")+1,]
+      result.full[k,8] <- date
+      result.full[k,9] <- time
+      lat <- lu.full[[2]][[j]][which(lu.full[[2]][[j]] == "GPS_lat=")+1,]
+      result.full[k,10] <- lat
+      lon <- lu.full[[2]][[j]][which(lu.full[[2]][[j]] == "GPS_lon=")+1,]
+      result.full[k,11] <- lon
+      result.full[k,12] <- i
+      result.full[k,13] <- j
+    }
+    m = j + w
+    final.results.full[[m]] <- result.full
+  }
+  Sys.sleep(1/1000)
+  setTxtProgressBar(pb.overall, i, title = NULL, label = NULL)
+}
+end.time <- Sys.time()
+end.time - start.time
+close(pb.overall)
+
+###########################
+# aggregation and writing
+###########################
+
+# aggregate lists
+aggregate.fine <- do.call("rbind", lapply(final.results.fine, function(x) return(x)))
+aggregate.full <- do.call("rbind", lapply(final.results.full, function(x) return(x)))
+aggregate.fine <- aggregate.fine[c(12,13,8:11,7,1:6)]
+aggregate.full <- aggregate.full[c(12,13,8:11,7,1:6)]
+# write to file
+write.csv(aggregate.fine, "./out/aggregateFine.csv", row.names = FALSE)
+write.csv(aggregate.full, "./out/aggregateFull.csv", row.names = FALSE)
diff --git a/aux/cleanUp.R b/aux/cleanUp.R
@@ -0,0 +1,10 @@
+cleanUp <- function(y){
+  x <- y[,1]
+  y <- as.data.frame(t(y[,-1]))
+  colnames(y) <- x
+  rownames(y) <- NULL
+  if(length(which(rowSums(is.na(y)) > 0) > 0)) {
+    stop("error, please remove NA-filled rows and try again")
+  }
+  return(y)
+}
diff --git a/aux/disect.R b/aux/disect.R
@@ -0,0 +1,20 @@
+disect <- function(file){
+  myValues <- list()
+  myDetails <- list()
+  dim <- nLines(file)/6
+  for(i in 1:dim){
+    a <- (6*i-5)
+    b <- (6*i-6)
+    myValues[[i]] <- read.csv(file, sep = ";", skip = a, nrows = 5, header = F, stringsAsFactors = FALSE)
+    x <- myValues[[i]][,1]
+    myValues[[i]] <- as.data.frame(t(myValues[[i]][,-1]))
+    colnames(myValues[[i]]) <- x
+    myValues[[i]] <- myValues[[i]][-which(rowSums(is.na(myValues[[i]])) == ncol(myValues[[i]])),]
+    rownames(myValues[[i]]) <- NULL
+    myDetails[[i]] <- read.csv(file, sep = ";", skip = b, nrows = 1, header = F, stringsAsFactors = FALSE)
+    myDetails[[i]] <- as.data.frame(t(myDetails[[i]]), stringsAsFactors = FALSE)
+    rownames(myDetails[[i]]) <- NULL
+  }
+  result <- list(myValues, myDetails)
+  return(result)
+}
diff --git a/aux/nLines.R b/aux/nLines.R
@@ -0,0 +1,10 @@
+nLines <- function(file){
+  testcon <- file(file,open="r")
+  readsizeof <- 20000
+  nooflines <- 0
+  while((linesread <- length(readLines(testcon,readsizeof))) > 0){
+    nooflines <- nooflines+linesread
+  }
+  close(testcon)
+  return(nooflines)
+}
diff --git a/aux/test.py b/aux/test.py
@@ -0,0 +1,24 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# load dependencies
+import csv
+import json
+
+# load json into dictionary in memory
+with open("../out/pond_comparison.json","r") as f:
+    hold = json.load(f)
+
+# manipulate dictionary to obtain relevant values
+rel = {}
+for key in hold.keys():
+    if hold[key]["IDDevice"] in ["SAM_8623","SAM_8624","SAM_8622"]:
+        rel[key] = hold[key]
+
+# get information out of rel
+final = [[rel[key]["IDDevice"],rel[key]["DateTime"]]+el for key in rel.keys() for el in rel[key]["data"]]
+
+# write to output file
+with open("../out/sample.csv","w") as f:
+    w = csv.writer(f)
+    w.writerows(final)
diff --git a/dat2json.py b/dat2json.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+# load dependencies
+import re
+import json
+import argparse
+import numpy as np
+from tqdm import tqdm
+
+##############################
+# define main function
+##############################
+
+def convertJSON(infile,outfile="out"):
+    # initialize variables
+    json_dict = {}
+    data = False
+    # find total lines in file for tqdm bar
+    # adapted from https://stackoverflow.com/a/55188797
+    num_lines = np.sum([1 for line in open("./data/ramses/"+infile,"r")])
+    # start readings lines and sequentially parsing
+    with open("./data/ramses/"+infile,"r") as f:
+        for line in tqdm(f,total=num_lines):
+            if len(re.findall("IDData\s+", line)) == 1:
+                # trigger to create new tree branch
+                el = re.split("\s*=\s*",line.strip())
+                json_dict[el[1]] = {}
+                reserve = el[1]
+            elif "=" in line and "Version" not in line:
+                # trigger to add leaves
+                el = re.split("\s*=\s*",line.strip())
+                json_dict[reserve][el[0]] = el[1]
+            elif "[END]" not in line and "[DATA]" in line:
+                # trigger to create data branch and accumulate intermediate list
+                data = True
+                int_ls = []
+            elif "[END]" in line and "[DATA]" in line:
+                # trigger to stop accumulation and deposit list
+                data = False
+                json_dict[reserve]["data"] = int_ls  
+            elif data:
+                # trigger to append data lines to intermediate list
+                int_ls.append(re.split("\s+",line.strip()))
+    # write parsed dictionary to json
+    with open("./out/"+outfile+".json", "w") as f:
+        json.dump(json_dict,f)
+
+##############################
+# main command call
+##############################
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--out", type=str, default = "out",
+                        help="name of output json <default:'out'>")
+    requiredNamed = parser.add_argument_group('required named arguments')
+    requiredNamed.add_argument('-i', '--input', type=str,
+                               help="name of input file, eg. 'ramses.dat'",
+                               required=True)
+    args = parser.parse_args()
+    convertJSON(args.input,args.out)
diff --git a/data/.gitignore b/data/.gitignore
@@ -0,0 +1,4 @@
+*.dat
+*.csv
+*.zip
+2019*
diff --git a/data/flox/readme.txt b/data/flox/readme.txt
@@ -0,0 +1 @@
+directory flox data
diff --git a/data/ramses/readme.txt b/data/ramses/readme.txt
@@ -0,0 +1 @@
+directory ramses data
-Original file line number
+Diff line change
@@ -0,0 +1,4 @@
+    *.dat
+    *.csv
+    *.zip
+*