Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

L7 Irish: add new dataset/datamodule #1197

Merged
merged 60 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
60 commits
Select commit Hold shift + click to select a range
3871588
Landsat 7 Irisih: add new dataset/datamodule
yichiac Mar 26, 2023
1211d33
# Changes to be committed:
yichiac Mar 28, 2023
9963dba
# Changes to be committed:
yichiac Mar 28, 2023
9e22c85
add: test_l7irish.py and data.py
yichiac Apr 1, 2023
ad8bfa8
# Changes to be committed:
yichiac Apr 1, 2023
ba3c8c6
modified: tests/datasets/test_l7irish.py,
yichiac Apr 1, 2023
63d29a3
added data.py, austral.tar.gz, test_l7irish.py
yichiac Apr 4, 2023
de91df1
remove comments in test_l7irish.py
yichiac Apr 4, 2023
6d65ec7
Merge branch 'main' into main
yichiac Apr 4, 2023
56dce86
resolve black and flake8 issues
yichiac Apr 4, 2023
00e91f9
Fixed _getitem
yichiac Apr 4, 2023
0b54298
Added L7 Irish datamodule
yichiac Apr 5, 2023
6f0c2b6
fix flake8 space error
yichiac Apr 5, 2023
fc9c788
fix black test error
yichiac Apr 5, 2023
ca2a826
chmod +x for data.py
yichiac Apr 5, 2023
094adf9
Update docs/api/datamodules.rst
yichiac Apr 5, 2023
12b699e
Update docs/api/datasets.rst
yichiac Apr 5, 2023
29bff42
Update docs/api/geo_datasets.csv
yichiac Apr 5, 2023
c6b78d9
Update torchgeo/datasets/l7irish.py
yichiac Apr 5, 2023
dd48635
Update torchgeo/datasets/l7irish.py
yichiac Apr 5, 2023
7064617
Resolved minor issues in l7irish.py
yichiac Apr 5, 2023
c839725
Improved _getitem and plot functions
yichiac Apr 5, 2023
9884f05
Added new artificial data with 5 scenes
yichiac Apr 5, 2023
2065e20
remove comments in l7irish.py
yichiac Apr 5, 2023
f6a67e0
Merge branch 'main' into datasets/l7irish
yichiac Apr 5, 2023
6394729
resolve black, flake8, and isort errors
yichiac Apr 5, 2023
0ca2043
add l7irish.yaml and refine test_segmentation.py
yichiac Apr 5, 2023
0c12781
modified l7irish.yaml
yichiac Apr 5, 2023
b81aa19
revert a change in .gitignore
yichiac Apr 5, 2023
218a776
add function test_rgb_bands_absent_plot()
yichiac Apr 6, 2023
222c8a9
resolve black test issue
yichiac Apr 6, 2023
30ab20f
Update torchgeo/datasets/l7irish.py
yichiac Apr 6, 2023
fd036d4
Update torchgeo/datasets/l7irish.py
yichiac Apr 6, 2023
61eeba5
Merge branch 'main' into datasets/l7irish
yichiac Apr 7, 2023
ce01caa
Updaye l7irish.py and create new test data
yichiac Apr 7, 2023
cbc7691
update l7irish.py for style tests
yichiac Apr 7, 2023
d4f069e
remove old test data
yichiac Apr 7, 2023
56473e9
Update tests/data/l7irish/data.py
yichiac Apr 7, 2023
a93ac58
Update torchgeo/datasets/l7irish.py
yichiac Apr 7, 2023
9b6c868
update data.py and l7irish.py
yichiac Apr 7, 2023
340d8b6
update md5s, citations, masks, and thermal bands
yichiac Apr 11, 2023
8cb6480
update mask mapping
yichiac Apr 11, 2023
4bfa372
update formatting
yichiac Apr 11, 2023
b4e5d17
update mask path
yichiac Apr 11, 2023
eb78c37
Merge branch 'main' into datasets/l7irish
yichiac Apr 11, 2023
e23ae78
Merge branch 'main' into datasets/l7irish
yichiac Apr 11, 2023
d4fa226
Merge branch 'main' into datasets/l7irish
yichiac Apr 11, 2023
65ea08c
Merge branch 'main' into datasets/l7irish
yichiac Apr 12, 2023
6b38751
Update torchgeo/datasets/l7irish.py
yichiac Apr 12, 2023
d7806ba
Update torchgeo/datasets/l7irish.py
yichiac Apr 12, 2023
64353ce
Update tests/data/l7irish/data.py
yichiac Apr 12, 2023
50db19d
Update docs/api/geo_datasets.csv
yichiac Apr 12, 2023
9dba933
Update tests/conf/l7irish.yaml
yichiac Apr 12, 2023
cfa20d3
resolve issues from comments
yichiac Apr 12, 2023
79cc501
Merge branch 'main' into datasets/l7irish
yichiac Apr 12, 2023
fe42927
Update L7 Irish link
yichiac Apr 12, 2023
01e1e80
Merge branch 'main' into datasets/l7irish
yichiac Apr 12, 2023
11149db
update mask data generation and review changes
yichiac Apr 12, 2023
796a607
Merge branch 'main' into datasets/l7irish
yichiac Apr 12, 2023
1107eaf
Fix checksums
adamjstewart Apr 12, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
yichiac marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,4 @@ dmypy.json

# Pyre type checker
.pyre/
.DS_Store
yichiac marked this conversation as resolved.
Show resolved Hide resolved
251 changes: 251 additions & 0 deletions torchgeo/datasets/l7irish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

"""Landsat 7 Cloud Cover Assessment Validation Data"""
import abc
import glob
import hashlib
import os
from functools import lru_cache
from typing import Any, Callable, Dict, List, Optional, cast

import matplotlib.pyplot as plt
import numpy as np
import torch
from matplotlib.colors import ListedColormap
from PIL import Image
from rasterio.crs import CRS
from torch import Tensor
from torch.utils.data import Dataset

from .geo import NonGeoDataset, RasterDataset
from .landsat import Landsat7
from .utils import BoundingBox, download_url, extract_archive, working_dir


class L7Irish(RasterDataset):
"""L7 Irish dataset.

The `L7 Irish <https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data>`__ dataset is based on Landsat 7
yichiac marked this conversation as resolved.
Show resolved Hide resolved

Dataset format:
* # images:
* image size:
* # spectral bands:
* Level-1G
yichiac marked this conversation as resolved.
Show resolved Hide resolved

* labels are values [0, 64, 128, 192, 255]

Dataset classes:
* Fill
* Cloud Shadow
* Clear
* Thin Cloud
* Cloud

If you use this dataset in your research, please cite the following papers:

* https://doi.org/10.5066/F7XD0ZWC
* https://doi.org/10.1109/TGRS.2011.2164087
"""

# naming... [need to change]
filename_regex = r"""
yichiac marked this conversation as resolved.
Show resolved Hide resolved
^L
(?P<sensor>[COTEM])
(?P<satellite>\d{2})
_(?P<processing_correction_level>[A-Z0-9]{4})
_(?P<wrs_path>\d{3})
(?P<wrs_row>\d{3})
_(?P<date>\d{8})
_(?P<processing_date>\d{8})
_(?P<collection_number>\d{2})
_(?P<collection_category>[A-Z0-9]{2})
_(?P<band>[A-Z0-9_]+)
\.
"""

tarfile_glob = "*.tar.gz"
yichiac marked this conversation as resolved.
Show resolved Hide resolved

url = "https://huggingface.co/datasets/torchgeo/l7irish/resolve/main/{}.tar.gz"

md5s = [("austral", "dbb6b5628f50861b9b89f548d25a925f"),
("boreal", "cecc72de09aacde4c4f8d7f0cf0d3f6f"),
("mid_latitude_north", "0f8382ca6554fb7cf9aff42226a14f9d"),
("mid_latitude_south", "b17cf6d023f752c533211fdb742f296b"),
("polar_north", "73923dcaf1b9b79bad82de1aa0740d1e"),
("polar_south", "3bc9f4c6f8955b10b4d55d23e0ab2da7"),
("subtropical_north", "f8f039970256902e6e9ebd6747589294"),
("subtropical_south", "8346d73a983396c5d41b577c3a94bc26"),
("tropical", "abe19b22b5d031e6b609cc7207706c3d")
]

classes = ["Fill", "Cloud Shadow", "Clear", "Thin Cloud", "Cloud"]

cmap = {
0: (0, 0, 0),
64: (64, 64, 64),
128: (128, 128, 128),
192: (192, 192, 192),
255: (255, 255, 255),
}
yichiac marked this conversation as resolved.
Show resolved Hide resolved

yichiac marked this conversation as resolved.
Show resolved Hide resolved
def __init__(
self, root: str = "data", download: bool = False, checksum: bool = False
yichiac marked this conversation as resolved.
Show resolved Hide resolved
) -> None:

"""Initialize a new Landsat 7 Cloud Cover Assessment Validation dataset instance.

Args:
root: root directory where dataset can be found
transforms: a function/transform that takes input sample and its target as
entry and returns a transformed version
cache: if True, cache file handle to speed up repeated sampling
yichiac marked this conversation as resolved.
Show resolved Hide resolved
download: if True, download dataset and store it in the root directory
checksum: if True, check the MD5 of the downloaded files (may be slow)

Raises:
RuntimeError: if ``download=False`` and data is not found, or checksums
don't match
"""
self.root = root
self.download = download
self.checksum = checksum

for i in list(self.cmap.keys()):
self.cmap[np.ceil(i/64).astype(int)] = self.cmap.pop(i)

lc_colors = np.zeros((max(self.cmap.keys()) + 1, 3))
lc_colors[list(self.cmap.keys())] = list(self.cmap.values())
lc_colors = lc_colors / 255
_lc_cmap = ListedColormap(lc_colors)
yichiac marked this conversation as resolved.
Show resolved Hide resolved

self._verify()
super().__init__(root, download, checksum)
yichiac marked this conversation as resolved.
Show resolved Hide resolved

def _verify(self) -> None:
"""Verify the integrity of the dataset.

Raises:
RuntimeError: if ``download=False`` but dataset is missing or checksum fails
"""
# Check if the extracted files already exist
pathname = os.path.join(self.root, "**", self.filename_glob)
for fname in glob.iglob(pathname, recursive=True):
if not fname.endswith(".zip"):
return

# Check if the zip files have already been downloaded
pathname = os.path.join(self.root, self.tarfile_glob)
if glob.glob(pathname):
self._extract()
return

# Check if the user requested to download the dataset
if not self.download:
raise RuntimeError(
f"Dataset not found in `root={self.root}` and `download=False`, "
"either specify a different `root` directory or use `download=True` "
"to automatically download the dataset."
)
# Download the dataset
self._download()
self._extract()

def _download(self) -> None:
"""Download the dataset."""
for biome, md5 in self.md5s:
download_url(
self.url.format(biome), self.root, md5=md5 if self.checksum else None
)

def _extract(self) -> None:
"""Extract the dataset."""
pathname = os.path.join(self.root, self.tarfile_glob)
for tarfile in glob.iglob(pathname):
extract_archive(tarfile)

def __getitem__(self, query: BoundingBox) -> Dict[str, Any]:
"""Retrieve image/mask and metadata indexed by query.

Args:
query: (minx, maxx, miny, maxy, mint, maxt) coordinates to index

Returns:
sample of image, mask and metadata at that index

Raises:
IndexError: if query is not found in the index
"""
hits = self.index.intersection(tuple(query), objects=True)
img_filepaths = cast(List[str], [hit.object for hit in hits])
mask_filepaths = [path.replace("","") for path in img_filepaths]
# mask_filepaths = [path.replace("images", "masks") for path in img_filepaths]
yichiac marked this conversation as resolved.
Show resolved Hide resolved

if not img_filepaths:
raise IndexError(
f"query: {query} not found in index with bounds: {self.bounds}"
)

img = self._merge_files(img_filepaths, query, self.band_indexes)
mask = self._merge_files(mask_filepaths, query, self.band_indexes)
yichiac marked this conversation as resolved.
Show resolved Hide resolved
# Mask needs to be converted from 0, 64, 128, 192, 255 to 0, 1, 2, 3, 4

sample = {
"crs": self.crs,
"bbox": query,
"image": img.float(),
"mask": mask.long(),
}

if self.transforms is not None:
sample = self.transforms(sample)

return sample

def plot(
self,
sample: Dict[str, Tensor],
show_titles: bool = True,
suptitle: Optional[str] = None,
) -> plt.Figure:
"""Plot a sample from the dataset.

Args:
sample: a sample returned by :meth:`__getitem__`
show_titles: flag indicating whether to show titles above each panel
suptitle: optional string to use as a suptitle

Returns:
a matplotlib Figure with the rendered sample
"""
image = np.rollaxis(sample["image"].numpy().astype("uint8").squeeze(), 0, 3)
mask = sample["mask"].numpy().astype("uint8").squeeze()

num_panels = 2
showing_predictions = "prediction" in sample
if showing_predictions:
predictions = sample["prediction"].numpy()
num_panels += 1

fig, axs = plt.subplots(1, num_panels, figsize=(num_panels * 4, 5))
axs[0].imshow(image)
axs[0].axis("off")
axs[1].imshow(mask, vmin=0, vmax=4,
cmap=self._lc_cmap, interpolation="none")
yichiac marked this conversation as resolved.
Show resolved Hide resolved
axs[1].axis("off")
if show_titles:
axs[0].set_title("Image")
axs[1].set_title("Mask")

if showing_predictions:
axs[2].imshow(
predictions, vmin=0, vmax=4, cmap=self._lc_cmap, interpolation="none"
)
axs[2].axis("off")
if show_titles:
axs[2].set_title("Predictions")

if suptitle is not None:
plt.suptitle(suptitle)
return fig