-
Notifications
You must be signed in to change notification settings - Fork 75
/
Copy pathp2_dataload.py
61 lines (49 loc) · 2.09 KB
/
p2_dataload.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import pandas as pd
import os
from PIL import Image # Replace by accimage when ready
from PIL.Image import FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM, ROTATE_90, ROTATE_180, ROTATE_270
from PIL.ImageEnhance import Color, Contrast, Brightness, Sharpness
from sklearn.preprocessing import MultiLabelBinarizer
from torch import np, from_numpy # Numpy like wrapper
class KaggleAmazonDataset(Dataset):
"""Dataset wrapping images and target labels for Kaggle - Planet Amazon from Space competition.
Arguments:
A CSV file path
Path to image folder
Extension of images
"""
def __init__(self, csv_path, img_path, img_ext, transform=None):
self.df = pd.read_csv(csv_path)
assert self.df['image_name'].apply(lambda x: os.path.isfile(img_path + x + img_ext)).all(), \
"Some images referenced in the CSV file were not found"
# Ordering weather first
self.mlb = MultiLabelBinarizer(
classes = [
'clear', 'cloudy', 'haze','partly_cloudy',
'agriculture','artisinal_mine','bare_ground','blooming',
'blow_down','conventional_mine','cultivation','habitation',
'primary','road','selective_logging','slash_burn','water'
]
)
self.img_path = img_path
self.img_ext = img_ext
self.transform = transform
self.X = self.df['image_name']
self.y = self.mlb.fit_transform(self.df['tags'].str.split()).astype(np.float32)
def X(self):
return self.X
def __getitem__(self, index):
img = Image.open(self.img_path + self.X[index] + self.img_ext)
img = img.convert('RGB')
if self.transform is not None:
img = self.transform(img)
label = from_numpy(self.y[index])
return img, label
def __len__(self):
return len(self.df.index)
def getLabelEncoder(self):
return self.mlb
def getDF(self):
return self.df