diff --git a/gips/data/core.py b/gips/data/core.py index c2c8d09c..48828c30 100644 --- a/gips/data/core.py +++ b/gips/data/core.py @@ -420,7 +420,7 @@ def _archivefile(cls, filename, update=False): newfilename = os.path.join(tpath, bname) if not os.path.exists(newfilename): # check if another asset exists - existing = cls.discover(asset.tile, d, asset.asset) + existing = cls.discover(asset.tile, d, asset.asset) if(len(existing) > 0 and (not update or not existing[0].updated(asset))): VerboseOut('%s: other version(s) already exists:' % bname, 1) @@ -792,16 +792,16 @@ def products2assets(cls, products): return set(assets) @classmethod - def fetch(cls, products, tiles, textent): - """ Download data for tiles and add to archive """ + def fetch(cls, products, tiles, textent, update=False): + """ Download data for tiles and add to archive. update forces fetch """ assets = cls.products2assets(products) fetched = [] for a in assets: for t in tiles: asset_dates = cls.Asset.dates(a, t, textent.datebounds, textent.daybounds) for d in asset_dates: - # if we don't have it already - if not cls.Asset.discover(t, d, a): + # if we don't have it already, or if update (force) flag + if not cls.Asset.discover(t, d, a) or update == True: try: cls.Asset.fetch(a, t, d) fetched.append((a, t, d)) diff --git a/gips/data/modis/modis.py b/gips/data/modis/modis.py index 03c77561..b455cc72 100755 --- a/gips/data/modis/modis.py +++ b/gips/data/modis/modis.py @@ -70,13 +70,13 @@ class modisAsset(Asset): _assets = { 'MCD43A4': { - 'pattern': 'MCD43A4*hdf', + 'pattern': 'MCD43A4.A???????.h??v??.???.?????????????.hdf', 'url': 'http://e4ftl01.cr.usgs.gov/MOTA/MCD43A4.006', 'startdate': datetime.date(2000, 2, 18), 'latency': -15 }, 'MCD43A2': { - 'pattern': 'MCD43A2*hdf', + 'pattern': 'MCD43A2.A???????.h??v??.???.?????????????.hdf', 'url': 'http://e4ftl01.cr.usgs.gov/MOTA/MCD43A2.006', 'startdate': datetime.date(2000, 2, 18), 'latency': -15 @@ -150,15 +150,23 @@ class modisAsset(Asset): def __init__(self, filename): """ Inspect a single file and get some metadata """ super(modisAsset, self).__init__(filename) - bname = os.path.basename(filename) - self.asset = bname[0:7] - self.tile = bname[17:23] - year = bname[9:13] - doy = bname[13:16] + bname = os.path.basename(filename) + parts = bname.split('.') + + self.asset = parts[0] + self.tile = parts[2] + self.sensor = parts[0][:3] + + year = parts[1][1:5] + doy = parts[1][5:8] self.date = datetime.datetime.strptime(year + doy, "%Y%j").date() - self.sensor = bname[:3] + collection = int(parts[3]) + file_version = int(parts[4]) + self.version = float('{}.{}'.format(collection, file_version)) + + @classmethod def fetch(cls, asset, tile, date): #super(modisAsset, cls).fetch(asset, tile, date) @@ -209,7 +217,16 @@ def fetch(cls, asset, tile, date): #raise Exception('Unable to find remote match for %s at %s' % (pattern, mainurl)) VerboseOut('Unable to find remote match for %s at %s' % (pattern, mainurl), 4) - + def updated(self, newasset): + ''' + Compare the version for this to that of newasset. + Return true if newasset version is greater. + ''' + return (self.sensor == newasset.sensor and + self.tile == newasset.tile and + self.date == newasset.date and + self.version < newasset.version) + class modisData(Data): """ A tile of data (all assets and products) """ name = 'Modis' @@ -291,7 +308,8 @@ def process(self, *args, **kwargs): missingassets = [] availassets = [] allsds = [] - + versions = {} + # Default sensor for products sensor = 'MCD' @@ -303,6 +321,8 @@ def process(self, *args, **kwargs): else: availassets.append(asset) allsds.extend(sds) + versions[asset] = int(re.findall('M.*\.00(\d)\.\d{13}\.hdf', sds[0])[0]) + if not availassets: # some products aren't available for every day but this is trying every day VerboseOut('There are no available assets (%s) on %s for tile %s' @@ -312,7 +332,6 @@ def process(self, *args, **kwargs): meta = self.meta_dict() meta['AVAILABLE_ASSETS'] = ' '.join(availassets) - if val[0] == "landcover": fname = '%s_%s_%s.tif' % (bname, sensor, key) if os.path.lexists(fname): @@ -322,6 +341,8 @@ def process(self, *args, **kwargs): if val[0] == "refl": + if versions[asset] != 6: + raise Exception('product version not supported') fname = '%s_%s_%s.tif' % (bname, sensor, key) img = gippy.GeoImage(sds[7:]) nodata = img[0].NoDataValue() @@ -337,6 +358,8 @@ def process(self, *args, **kwargs): if val[0] == "quality": + if versions[asset] != 6: + raise Exception('product version not supported') fname = '%s_%s_%s.tif' % (bname, sensor, key) img = gippy.GeoImage(sds[:7]) nodata = img[0].NoDataValue() @@ -353,22 +376,29 @@ def process(self, *args, **kwargs): # LAND VEGETATION INDICES PRODUCT if val[0] == "indices": - VERSION = "2.0" meta['VERSION'] = VERSION sensor = 'MCD' fname = '%s_%s_%s' % (bname, sensor, key) - refl = gippy.GeoImage(allsds) - missing = 32767 - redimg = refl[7].Read() - nirimg = refl[8].Read() - bluimg = refl[9].Read() - grnimg = refl[10].Read() - mirimg = refl[11].Read() - swrimg = refl[12].Read() # formerly swir2 + if versions[asset] == 6: + redimg = refl[7].Read() + nirimg = refl[8].Read() + bluimg = refl[9].Read() + grnimg = refl[10].Read() + mirimg = refl[11].Read() + swrimg = refl[12].Read() # formerly swir2 + elif versions[asset] == 5: + redimg = refl[0].Read() + nirimg = refl[1].Read() + bluimg = refl[2].Read() + grnimg = refl[3].Read() + mirimg = refl[4].Read() + swrimg = refl[5].Read() # formerly swir2 + else: + raise Exception('product version not supported') redimg[redimg < 0.0] = 0.0 nirimg[nirimg < 0.0] = 0.0 diff --git a/gips/inventory.py b/gips/inventory.py index f5a1abb2..062aeb75 100644 --- a/gips/inventory.py +++ b/gips/inventory.py @@ -34,6 +34,8 @@ from gips.data.core import Data from gips.mapreduce import MapReduce +from pdb import set_trace + class Inventory(object): """ Base class for inventories """ @@ -234,7 +236,7 @@ def map_reduce(self, func, numbands=1, products=None, readfunc=None, nchunks=100 class DataInventory(Inventory): """ Manager class for data inventories (collection of Tiles class) """ - def __init__(self, dataclass, spatial, temporal, products=None, fetch=False, **kwargs): + def __init__(self, dataclass, spatial, temporal, products=None, fetch=False, update=False, **kwargs): """ Create a new inventory :dataclass: The Data class to use (e.g., LandsatData, ModisData) :spatial: The spatial extent requested @@ -250,12 +252,16 @@ def __init__(self, dataclass, spatial, temporal, products=None, fetch=False, **k self.temporal = temporal self.products = dataclass.RequestedProducts(products) + self.update = update + + print "datainventory" + if fetch: try: - dataclass.fetch(self.products.base, self.spatial.tiles, self.temporal) + dataclass.fetch(self.products.base, self.spatial.tiles, self.temporal, self.update) except Exception, e: raise Exception('Error downloading %s: %s' % (dataclass.name, e)) - dataclass.Asset.archive(Repository.path('stage')) + dataclass.Asset.archive(Repository.path('stage'), update=self.update) # find data self.data = {} diff --git a/gips/parsers.py b/gips/parsers.py index 270ab12e..a05cf96c 100644 --- a/gips/parsers.py +++ b/gips/parsers.py @@ -81,6 +81,8 @@ def add_inventory_parser(self, site_required=False): group.add_argument('--%cov', dest='pcov', help='Threshold of %% tile coverage over site', default=0, type=int) group.add_argument('--%tile', dest='ptile', help='Threshold of %% tile used', default=0, type=int) group.add_argument('--fetch', help='Fetch any missing data (if supported)', default=False, action='store_true') + group.add_argument('--update', help='Force fetch and/ or update data (if supported)', default=False, action='store_true') + group.add_argument('-v', '--verbose', help='Verbosity - 0: quiet, 1: normal, 2: debug', default=1, type=int) group.add_argument('-p', '--products', help='Requested Products', nargs='*') self.parent_parsers.append(parser) diff --git a/gips/scripts/inventory.py b/gips/scripts/inventory.py index 59449501..e42257f1 100644 --- a/gips/scripts/inventory.py +++ b/gips/scripts/inventory.py @@ -27,6 +27,7 @@ from gips.utils import Colors, VerboseOut, open_vector, import_data_class from gips.inventory import DataInventory +from pdb import set_trace def main(): title = Colors.BOLD + 'GIPS Data Inventory (v%s)' % gipsversion + Colors.OFF @@ -44,6 +45,7 @@ def main(): extents = SpatialExtent.factory(cls, args.site, args.key, args.where, args.tiles, args.pcov, args.ptile) + for extent in extents: inv = DataInventory(cls, extent, TemporalExtent(args.dates, args.days), **vars(args)) inv.pprint(md=args.md)