forked from icherkas/fcast
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDownload_Reanalysis_Data.py
70 lines (61 loc) · 2.96 KB
/
Download_Reanalysis_Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# This script is used to download the reanalysis data from AWS for the CHRTOUT
# https://docs.opendata.aws/nwm-archive/readme.html NOAA National Water Model Reanalysis Model data on AWS documentation
# We are using the CHRTOUT product the stream flow value at points associated with the flow lines for every hour.
# A directory called output needs to be created before running the script.
# The downloaded data will be located in a directory called output.
# The output directory will be in the same level as the script in the current working directory.
# This script can be used to download different products from the Reanalysis data.
# The Reanalysis data products are RTOUT, CHRTOUT, LAKEOUT, and LDASOUT
import boto3
import os
import pathlib as pl
#import io
import dask
import netCDF4
import xarray as xr
import pandas as pd
# Amazon Ops
def s3List(bucketName, prefixName, nameSelector, fileformat):
'''
This function takes an S3 bucket name and prefix (flat directory path) and returns a list of netcdf file.
This function utilizes boto3's continuation token to iterate over an unlimited number of records.
BUCKETNAME -- A bucket on S3 containing GeoTiffs of interest
PREFIXNAME -- A S3 prefix.
NAMESELECTOR -- A string used for selecting specific files. E.g. 'SC' for SC_R_001.tif.
FILEFORMAT -- A string variant of a file format.
'''
# Set the Boto3 client
s3_client = boto3.client('s3')
# Get a list of objects (keys) within a specific bucket and prefix on S3
keys = s3_client.list_objects_v2(Bucket=bucketName, Prefix=prefixName)
# Store keys in a list
keysList = [keys]
# While the boto3 returned objects contains a value of true for 'IsTruncated'
while keys['IsTruncated'] is True:
# Append to the list of keys
# Note that this is a repeat of the above line with a contuation token
keys = s3_client.list_objects_v2(Bucket=bucketName, Prefix=prefixName,
ContinuationToken=keys['NextContinuationToken'])
keysList.append(keys)
# Create a list of GeoTiffs from the supplied keys
# While tif is hardcoded now, this could be easily changed.
pathsList = []
for key in keysList:
paths = ['s3://' + bucketName + '/' + elem['Key'] for elem in key['Contents'] \
if elem['Key'].find('{}'.format(nameSelector)) >= 0 and elem['Key'].endswith(fileformat)]
pathsList = pathsList + paths
return pathsList
if __name__ == '__main__':
s3client = boto3.client("s3")
s3 = boto3.resource('s3')
bucket = 'nwm-archive'
prefix = '2003'
s3Obj = s3.Object(bucket_name=bucket, key=prefix)
print(s3Obj)
datafiles = s3List(bucket, prefix, 'CHRTOUT', 'DOMAIN1.comp')
#datafile = datafiles[0]
# print(datafiles)
for obj in datafiles:
filename = obj.split("/")[-1]
# print(obj)
s3client.download_file(bucket,prefix+'/'+filename, "./output/" + filename)