Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
leroybird committed May 5, 2021
1 parent 4e80fc6 commit cdce808
Show file tree
Hide file tree
Showing 25 changed files with 1,793 additions and 0 deletions.
25 changes: 25 additions & 0 deletions DockerImages/01-Base/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Base image fedora latest (centos:centos8.2.2004)
FROM centos:centos8.2.2004
# Remember to build uaqh/base

# Install dependencies
RUN dnf -y install epel-release \
'dnf-command(config-manager)'
RUN dnf config-manager --set-enabled PowerTools
RUN dnf -y install \
R \
netcdf-devel \
libjpeg-turbo-devel \
openssl-devel \
libxml2-devel \
libcurl-devel

# Create the right folders
RUN mkdir -p /install
# copy files
COPY install_packages.R /install/install_packages.R

# install R-packages
RUN Rscript /install/install_packages.R
# Start BASH for testing and tidying
CMD ["/bin/bash"]
17 changes: 17 additions & 0 deletions DockerImages/01-Base/install_packages.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Install some really useful packages
# Set mirror
r <- getOption("repos")
r["CRAN"] <- "https://cloud.r-project.org"
options(repos=r)
# Libraries
install.packages("readr")
install.packages("RNetCDF")
install.packages("openair")
install.packages("doParallel")
install.packages("aws.s3")
install.packages("stringr")
install.packages("reshape2")
install.packages("fitdistrplus")
install.packages("ggplot2")
install.packages("lubridate")

14 changes: 14 additions & 0 deletions DockerImages/02-Upload2SS3/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Base image fedora latest (centos:centos8.2.2004)
FROM uaqh/base:latest
# Remember to build uaqh/mapm-uncertainties

# Create the right folders
# copy files
COPY ES642_data2s3.R /ES642_data2s3.R
COPY ODIN_data2s3.R /ODIN_data2s3.R
COPY TEOM_data2s3.R /TEOM_data2s3.R
COPY upload_data.R /upload_data.R
COPY secret_aws.txt /secret_aws.txt

# Run the R-packages
CMD Rscript upload_data.R
179 changes: 179 additions & 0 deletions DockerImages/02-Upload2SS3/ES642_data2s3.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# Move data to an S3 bucket

library(readr)
library(aws.s3)
library(RNetCDF)
aws_secrets <- read_delim("./secret_aws.txt",
delim = ";",
col_names = FALSE)

Sys.setenv("AWS_ACCESS_KEY_ID" = aws_secrets$X1,
"AWS_SECRET_ACCESS_KEY" = aws_secrets$X2,
"AWS_DEFAULT_REGION" = "ap-southeast-2")

# ES642 RAW colocation 1 #######
file_in_path <- "./data/ES642/Colocation_1/Raw/NetCDF/"
nc_files <- dir(file_in_path,full.names = FALSE,pattern = "nc")

# Create bucket
bucket_name <- "mapm-es642-raw-colo1"
if (!bucket_exists(bucket_name)[1]){
put_bucket(bucket_name)
}

for (nc_file in nc_files) {
put_object(file = paste0(file_in_path,nc_file),
object = nc_file,
bucket = bucket_name)
}

# ES642 RAW colocation 2 #######
file_in_path <- "./data/ES642/Colocation_2/Raw/NetCDF/"
nc_files <- dir(file_in_path,full.names = FALSE,pattern = "nc")

# Create bucket
bucket_name <- "mapm-es642-raw-colo2"
if (!bucket_exists(bucket_name)[1]){
put_bucket(bucket_name)
}
for (nc_file in nc_files) {
put_object(file = paste0(file_in_path,nc_file),
object = nc_file,
bucket = bucket_name)
}

# ES642 v01 colocation 1 #######
file_in_path <- "./data/ES642/Colocation_1/Raw/NetCDF/"
file_out_path <- "./data/ES642/Colocation_1/v01/NetCDF/"
nc_files_in <- dir(file_in_path,full.names = FALSE,pattern = "nc")
nc_files <- stringr::str_replace(nc_files_in,"raw","v01")
file.copy(paste0(file_in_path,nc_files_in),
paste0(file_out_path,nc_files),
overwrite = TRUE)
v01.correction <- read_delim("./data/v01_ES642_coefficients.txt",
delim = "\t",
col_names = TRUE)
v01.correction <- v01.correction[,c(2,3,4)]
names(v01.correction) <- c("device","a","b")

# Create bucket
bucket_name <- "mapm-es642-v01-colo1"
if (!bucket_exists(bucket_name)[1]){
put_bucket(bucket_name)
}
for (nc_file in nc_files) {
# Get Device to match correction coefficients
curr_dev <- paste0("ES642",substr(stringr::str_split(nc_file,
"_Christchurch")[[1]][1],
8,
50))
id_dev <- which(v01.correction$device==curr_dev)
if (length(id_dev)>0){
if (!is.na(v01.correction$a[id_dev])){
a <- 1
b <- 0
} else {
a <- v01.correction$a[id_dev]
b <- v01.correction$b[id_dev]
}
# Open NetCDF file
curr_nc <- open.nc(paste0(file_out_path,nc_file),
write = TRUE)
# Extract ONLY pm2.5 variable
raw_pm2.5 <- var.get.nc(curr_nc,'pm2.5')
v01_pm2.5 <- a * raw_pm2.5 + b
var.put.nc(ncfile = curr_nc,
variable = 'pm2.5',
data = v01_pm2.5)
sync.nc(curr_nc)
close.nc(curr_nc)
}
put_object(file = paste0(file_out_path,nc_file),
object = nc_file,
bucket = bucket_name)
}

# ODIN v01 colocation 2 #######
file_in_path <- "./data/ES642/Colocation_1/Raw/NetCDF/"
file_out_path <- "./data/ES642/Colocation_1/v01/NetCDF/"
nc_files_in <- dir(file_in_path,full.names = FALSE,pattern = "nc")
nc_files <- stringr::str_replace(nc_files_in,"raw","v01")
file.copy(paste0(file_in_path,nc_files_in),
paste0(file_out_path,nc_files),
overwrite = TRUE)
v01.correction <- read_delim("./data/v01_ES642_coefficients.txt",
delim = "\t",
col_names = TRUE)
v01.correction <- v01.correction[,c(2,5,6)]
names(v01.correction) <- c("device","a","b")

# Create bucket
bucket_name <- "mapm-es642-v01-colo2"
if (!bucket_exists(bucket_name)[1]){
put_bucket(bucket_name)
}
for (nc_file in nc_files) {
# Get Device to match correction coefficients
curr_dev <- paste0("ES642",substr(stringr::str_split(nc_file,
"_Christchurch")[[1]][1],
8,
50))
id_dev <- which(v01.correction$device==curr_dev)
if (length(id_dev)>0){
if (!is.na(v01.correction$a[id_dev])){
a <- 1
b <- 0
} else{
a <- v01.correction$a[id_dev]
b <- v01.correction$b[id_dev]
}
# Open NetCDF file
curr_nc <- open.nc(paste0(file_out_path,nc_file),
write = TRUE)
# Extract ONLY pm2.5 variable
raw_pm2.5 <- var.get.nc(curr_nc,'pm2.5')
v01_pm2.5 <- a * raw_pm2.5 + b
var.put.nc(ncfile = curr_nc,
variable = 'pm2.5',
data = v01_pm2.5)
sync.nc(curr_nc)
close.nc(curr_nc)
}
put_object(file = paste0(file_out_path,nc_file),
object = nc_file,
bucket = bucket_name)
}


# ES642 RAW deployment #######
file_in_path <- "./data/ES642/Deployment/Raw/NetCDF/"
nc_files <- dir(file_in_path,full.names = FALSE,pattern = "nc")

# Create bucket
bucket_name <- "mapm-es642-raw-deployment"
if (!bucket_exists(bucket_name)[1]){
put_bucket(bucket_name)
}
for (nc_file in nc_files) {
put_object(file = paste0(file_in_path,nc_file),
object = nc_file,
bucket = bucket_name,
multipart = TRUE)
}

# ES642 v01 deployment #######
file_in_path <- "./data/ES642/Deployment/v_01/NetCDF/"
nc_files <- dir(file_in_path,full.names = FALSE,pattern = "nc")

# Create bucket
bucket_name <- "mapm-es642-v01-deployment"
if (!bucket_exists(bucket_name)[1]){
put_bucket(bucket_name)
}
for (nc_file in nc_files) {
put_object(file = paste0(file_in_path,nc_file),
object = nc_file,
bucket = bucket_name,
multipart = TRUE)
}

Loading

0 comments on commit cdce808

Please sign in to comment.