Skip to content

Commit

Permalink
Merge pull request #18 from usnistgov/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
knc6 authored Jan 28, 2023
2 parents a58bf84 + 13cbf1c commit 567f8a4
Show file tree
Hide file tree
Showing 5 changed files with 354 additions and 7 deletions.
2 changes: 1 addition & 1 deletion atomvision/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
"""Version number."""
__version__ = "2022.11.19"
__version__ = "2023.1.27"
43 changes: 40 additions & 3 deletions atomvision/scripts/train_autoencoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import sys
import random
import argparse

# from jarvis.db.jsonutils import dumpjson

random_seed = 123
Expand All @@ -24,9 +25,11 @@
class AE(nn.Module):
"""Module for auto-encoder."""

def __init__(self, input_shape=50176, feats=1120):
def __init__(self, input_shape=50176, feats=128):
# def __init__(self, input_shape=50176, feats=1120):
# def __init__(self, input_shape=50176,feats=448):
"""Initialize class."""
print("Using feature size", feats)
super().__init__()
self.encoder_hidden_layer = nn.Linear(
in_features=input_shape, out_features=feats
Expand Down Expand Up @@ -75,6 +78,12 @@ def forward(self, features):
help="Input size e.g 224x224."
# "--input_size", default=784, help="Input size e.g 224x224."
)
parser.add_argument(
"--feat_size",
default=1120,
help="latent dim size e.g 128."
# "--input_size", default=784, help="Input size e.g 224x224."
)
parser.add_argument("--epochs", default=200, help="Number of epochs.")


Expand All @@ -89,6 +98,7 @@ def forward(self, features):
args = parser.parse_args(sys.argv[1:])
epochs = int(args.epochs)
input_size = int(args.input_size)
feat_size = int(args.feat_size)
batch_size = int(args.batch_size)

output_dir = args.output_dir
Expand All @@ -97,7 +107,7 @@ def forward(self, features):

# create a model from `AE` autoencoder class
# load it to the specified device, either gpu or cpu
model = AE(input_shape=input_size).to(device)
model = AE(input_shape=input_size, feats=feat_size).to(device)

# create an optimizer object
# Adam optimizer with learning rate 1e-3
Expand Down Expand Up @@ -165,7 +175,34 @@ def forward(self, features):
loss = loss / len(train_loader)

# display the epoch training loss
print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))

torch.save(model.state_dict(), "ae.pt")

val_loss = 0
with torch.no_grad():
for batch_features, _ in test_loader:
optimizer.zero_grad()
# reshape mini-batch data to [N, 784] matrix
# load it to the active device
batch_features = batch_features.view(-1, input_size).to(device)
# batch_features = batch_features.view(-1, 784).to(device)

# reset the gradients back to zero
# PyTorch accumulates gradients on subsequent backward passes

# compute reconstructions
outputs = model(batch_features)

# compute training reconstruction loss
v_loss = criterion(outputs, batch_features)

# add the mini-batch training loss to epoch loss
val_loss += v_loss.item()

# compute the epoch training loss
val_loss = val_loss / len(test_loader)
# print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
print("Epoch, train_loss, val_loss", epoch + 1, loss, val_loss)

test_examples = None
with torch.no_grad():
Expand Down
120 changes: 118 additions & 2 deletions atomvision/scripts/train_tsne.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
import matplotlib.pyplot as plt
from PIL import ImageFile

from atomvision.scripts.image_to_graph import (
crop_image,
get_blob_positions,
blob_list_to_graph,
)

# from matplotlib.offsetbox import OffsetImage, AnnotationBbox
# import torchvision.transforms as transforms
# https://raw.githubusercontent.com/GunhoChoi/PyTorch-FastCampus/master/07_Transfer_Learning/2_T-SNE/color_tsne.py
Expand All @@ -22,7 +28,7 @@ def train_tsne(
perplexity=30,
filename=None,
):

"""Get T-SNE clustering."""
# transform = transforms.Compose(
# [
# transforms.ToTensor(),
Expand Down Expand Up @@ -114,8 +120,118 @@ def train_tsne(
help="Folder with training images. Each class should have its own folder.",
)


def labelled_images_to_graphs(images, labels, border_pxl=0, saveto=""):
"""
Convert labelled images to graphs.
Args:
images: list of image arrays
labels: list of corresponding class labels
"""
graphs = []
line_graphs = []
n = 0
for img, lbl in zip(images, labels):
if border_pxl != 0:
img = crop_image(img, border_pxl)
blob_list = get_blob_positions(img)
g, lg = blob_list_to_graph(img, blob_list)
graphs.append(g)
line_graphs.append(lg)
n = n + 1
return graphs, line_graphs


def train_tsne_graph(
data_dir="New_stem_2d/train_folder",
image_size=256,
border_pxl=0,
perplexity=30,
filename=None,
):
"""Train T-SNE with Graph features."""
data = dset.ImageFolder(data_dir)

image_arr = []
label_arr = []

for idx, (image_path, label) in enumerate(data.imgs):
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

image2 = cv2.resize(
image, [image_size, image_size], interpolation=cv2.INTER_AREA
).T # [np.newaxis,...]
max_val = 1 # np.max(image2)

i = (
image2 / max_val
) # torch.tensor(np.tile(image, (3,1, 1)))#[np.newaxis,...])

image_arr.append(i)
label_arr.append(label)
plt.imshow(image_arr[0])
plt.savefig("first_image.pdf", bbox_inches="tight")
print("first image", image_arr[0])
print("image_dim", image_arr[0].shape)
print("image_arr len", len(image_arr))
print("\n------Starting Graph Generation------\n")

g_arr, lg_arr = labelled_images_to_graphs(
np.array(image_arr), label_arr, border_pxl=50
)

edges = []
for lg in lg_arr:
angles = lg.edata["h"].numpy()
hist, bin_edges = np.histogram(angles, 200, (-1, 1))
edges.append(hist)
print("\n------Starting TSNE------\n")

model = TSNE(
n_components=2, perplexity=perplexity, random_state=1, init="warn"
)

result = model.fit_transform(np.array(edges))

print("\n------TSNE Done------\n")
X_embedded = result

# %matplotlib inline
plt.rcParams.update({"font.size": 14})
plt.figure(figsize=(8, 8))
X = X_embedded
x = X[:, 0]
y = X[:, 1]

term_list = list(np.array(label_arr))
term_set = list(set(term_list))
term_list = [term_set.index(term) for term in term_list]

color_list = plt.cm.tab10(term_list)

lbls = []
xyz = []
for i, j, k, p in zip(x, y, term_list, color_list):
if k not in lbls:
lbls.append(k)
xyz.append([i, j, k])
plt.scatter(i, j, s=10, c=p, label=term_set[k])

plt.scatter(x, y, s=10, c=color_list) # ,label=term_list)
plt.legend(loc="lower left")

plt.xticks([])
plt.yticks([])
if filename is None:
plt.show()
else:
plt.savefig(filename)
plt.close()


if __name__ == "__main__":
args = parser.parse_args(sys.argv[1:])
data_dir = str(args.data_dir)

train_tsne(data_dir=data_dir, filename="tsne.pdf")
train_tsne_graph(data_dir=data_dir, filename="tsne_graph.pdf")
Loading

0 comments on commit 567f8a4

Please sign in to comment.