Merge pull request #18 from usnistgov/develop

Develop
usnistgov · Jan 28, 2023 · 567f8a4 · 567f8a4
2 parents a58bf84 + 13cbf1c
commit 567f8a4
Show file tree

Hide file tree

Showing 5 changed files with 354 additions and 7 deletions.
diff --git a/atomvision/__init__.py b/atomvision/__init__.py
@@ -1,2 +1,2 @@
 """Version number."""
-__version__ = "2022.11.19"
+__version__ = "2023.1.27"
diff --git a/atomvision/scripts/train_autoencoder.py b/atomvision/scripts/train_autoencoder.py
@@ -10,6 +10,7 @@
 import sys
 import random
 import argparse
+
 # from jarvis.db.jsonutils import dumpjson
 
 random_seed = 123
@@ -24,9 +25,11 @@
 class AE(nn.Module):
     """Module for auto-encoder."""
 
-    def __init__(self, input_shape=50176, feats=1120):
+    def __init__(self, input_shape=50176, feats=128):
+        # def __init__(self, input_shape=50176, feats=1120):
         # def __init__(self, input_shape=50176,feats=448):
         """Initialize class."""
+        print("Using feature size", feats)
         super().__init__()
         self.encoder_hidden_layer = nn.Linear(
             in_features=input_shape, out_features=feats
@@ -75,6 +78,12 @@ def forward(self, features):
     help="Input size e.g 224x224."
     # "--input_size", default=784, help="Input size e.g 224x224."
 )
+parser.add_argument(
+    "--feat_size",
+    default=1120,
+    help="latent dim size e.g 128."
+    # "--input_size", default=784, help="Input size e.g 224x224."
+)
 parser.add_argument("--epochs", default=200, help="Number of epochs.")
 
 
@@ -89,6 +98,7 @@ def forward(self, features):
     args = parser.parse_args(sys.argv[1:])
     epochs = int(args.epochs)
     input_size = int(args.input_size)
+    feat_size = int(args.feat_size)
     batch_size = int(args.batch_size)
 
     output_dir = args.output_dir
@@ -97,7 +107,7 @@ def forward(self, features):
 
     # create a model from `AE` autoencoder class
     # load it to the specified device, either gpu or cpu
-    model = AE(input_shape=input_size).to(device)
+    model = AE(input_shape=input_size, feats=feat_size).to(device)
 
     # create an optimizer object
     # Adam optimizer with learning rate 1e-3
@@ -165,7 +175,34 @@ def forward(self, features):
         loss = loss / len(train_loader)
 
         # display the epoch training loss
-        print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
+
+        torch.save(model.state_dict(), "ae.pt")
+
+        val_loss = 0
+        with torch.no_grad():
+            for batch_features, _ in test_loader:
+                optimizer.zero_grad()
+                # reshape mini-batch data to [N, 784] matrix
+                # load it to the active device
+                batch_features = batch_features.view(-1, input_size).to(device)
+                # batch_features = batch_features.view(-1, 784).to(device)
+
+                # reset the gradients back to zero
+                # PyTorch accumulates gradients on subsequent backward passes
+
+                # compute reconstructions
+                outputs = model(batch_features)
+
+                # compute training reconstruction loss
+                v_loss = criterion(outputs, batch_features)
+
+                # add the mini-batch training loss to epoch loss
+                val_loss += v_loss.item()
+
+        # compute the epoch training loss
+        val_loss = val_loss / len(test_loader)
+        # print("epoch : {}/{}, loss = {:.6f}".format(epoch + 1, epochs, loss))
+        print("Epoch, train_loss, val_loss", epoch + 1, loss, val_loss)
 
     test_examples = None
     with torch.no_grad():

diff --git a/atomvision/scripts/train_tsne.py b/atomvision/scripts/train_tsne.py
@@ -8,6 +8,12 @@
 import matplotlib.pyplot as plt
 from PIL import ImageFile
 
+from atomvision.scripts.image_to_graph import (
+    crop_image,
+    get_blob_positions,
+    blob_list_to_graph,
+)
+
 # from matplotlib.offsetbox import OffsetImage, AnnotationBbox
 # import torchvision.transforms as transforms
 # https://raw.githubusercontent.com/GunhoChoi/PyTorch-FastCampus/master/07_Transfer_Learning/2_T-SNE/color_tsne.py
@@ -22,7 +28,7 @@ def train_tsne(
     perplexity=30,
     filename=None,
 ):
-
+    """Get T-SNE clustering."""
     #     transform = transforms.Compose(
     #         [
     #             transforms.ToTensor(),
@@ -114,8 +120,118 @@ def train_tsne(
     help="Folder with training images. Each class should have its own folder.",
 )
 
+
+def labelled_images_to_graphs(images, labels, border_pxl=0, saveto=""):
+    """
+    Convert labelled images to graphs.
+
+    Args:
+         images: list of image arrays
+         labels: list of corresponding class labels
+    """
+    graphs = []
+    line_graphs = []
+    n = 0
+    for img, lbl in zip(images, labels):
+        if border_pxl != 0:
+            img = crop_image(img, border_pxl)
+        blob_list = get_blob_positions(img)
+        g, lg = blob_list_to_graph(img, blob_list)
+        graphs.append(g)
+        line_graphs.append(lg)
+        n = n + 1
+    return graphs, line_graphs
+
+
+def train_tsne_graph(
+    data_dir="New_stem_2d/train_folder",
+    image_size=256,
+    border_pxl=0,
+    perplexity=30,
+    filename=None,
+):
+    """Train T-SNE with Graph features."""
+    data = dset.ImageFolder(data_dir)
+
+    image_arr = []
+    label_arr = []
+
+    for idx, (image_path, label) in enumerate(data.imgs):
+        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+
+        image2 = cv2.resize(
+            image, [image_size, image_size], interpolation=cv2.INTER_AREA
+        ).T  # [np.newaxis,...]
+        max_val = 1  # np.max(image2)
+
+        i = (
+            image2 / max_val
+        )  # torch.tensor(np.tile(image, (3,1, 1)))#[np.newaxis,...])
+
+        image_arr.append(i)
+        label_arr.append(label)
+    plt.imshow(image_arr[0])
+    plt.savefig("first_image.pdf", bbox_inches="tight")
+    print("first image", image_arr[0])
+    print("image_dim", image_arr[0].shape)
+    print("image_arr len", len(image_arr))
+    print("\n------Starting Graph Generation------\n")
+
+    g_arr, lg_arr = labelled_images_to_graphs(
+        np.array(image_arr), label_arr, border_pxl=50
+    )
+
+    edges = []
+    for lg in lg_arr:
+        angles = lg.edata["h"].numpy()
+        hist, bin_edges = np.histogram(angles, 200, (-1, 1))
+        edges.append(hist)
+    print("\n------Starting TSNE------\n")
+
+    model = TSNE(
+        n_components=2, perplexity=perplexity, random_state=1, init="warn"
+    )
+
+    result = model.fit_transform(np.array(edges))
+
+    print("\n------TSNE Done------\n")
+    X_embedded = result
+
+    # %matplotlib inline
+    plt.rcParams.update({"font.size": 14})
+    plt.figure(figsize=(8, 8))
+    X = X_embedded
+    x = X[:, 0]
+    y = X[:, 1]
+
+    term_list = list(np.array(label_arr))
+    term_set = list(set(term_list))
+    term_list = [term_set.index(term) for term in term_list]
+
+    color_list = plt.cm.tab10(term_list)
+
+    lbls = []
+    xyz = []
+    for i, j, k, p in zip(x, y, term_list, color_list):
+        if k not in lbls:
+            lbls.append(k)
+            xyz.append([i, j, k])
+            plt.scatter(i, j, s=10, c=p, label=term_set[k])
+
+    plt.scatter(x, y, s=10, c=color_list)  # ,label=term_list)
+    plt.legend(loc="lower left")
+
+    plt.xticks([])
+    plt.yticks([])
+    if filename is None:
+        plt.show()
+    else:
+        plt.savefig(filename)
+        plt.close()
+
+
 if __name__ == "__main__":
     args = parser.parse_args(sys.argv[1:])
     data_dir = str(args.data_dir)
 
-    train_tsne(data_dir=data_dir, filename="tsne.pdf")
+    train_tsne_graph(data_dir=data_dir, filename="tsne_graph.pdf")