part2-2.py

# -*- coding: utf-8 -*-
"""chaitanya_part_2_observegradient.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/17qzBPPy81cwF6YVJmRHePbv8OSmcnx3a
"""

pip install tensorflow==2.4

# Commented out IPython magic to ensure Python compatibility.
# This file calculates gradient norm during the training of a DNN 

import tensorflow as tf
import numpy as np
import torch
import torchvision as tv
from torchvision import transforms, datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
# %matplotlib inline

# Create random data between (-10, 10) and determine groundtruth
# GroundTruth Function: y = arcsinh(5*pi*x)
simulatedInput = 20 * torch.rand((1000, 1)) - 10
groundTruth = np.arcsinh(5*np.pi*simulatedInput)

# Calculate the number of parameters in a neural network
def calcParams(inputModel):
    val = sum(params.numel() for params in inputModel.parameters() if params.requires_grad)
    return val

# Set up NN for MNIST training - 3 Hidden layer, 209 parameters
class GradientNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(1, 8)
        self.fc2 = nn.Linear(8, 12)
        self.fc3 = nn.Linear(12, 6)
        self.fc4 = nn.Linear(6, 1)

    def forward(self, val):
        val = F.relu(self.fc1(val))
        val = F.relu(self.fc2(val))
        val = F.relu(self.fc3(val))
        val = self.fc4(val)
        return val

model1 = GradientNN()
print(calcParams(model1))

# Set up necessary auxilaries for neural net training
gradNet = GradientNN()
costFunc = nn.MSELoss()
opt = optim.Adam(gradNet.parameters(), lr=0.001)
EPOCHS = 2000

# Train Network 
costList = []
gradNormList = []
counterList = []
counter = 1
for index in range(EPOCHS):
    counterList.append(counter)
    counter += 1
    gradNet.zero_grad()
    output = gradNet(simulatedInput)
    cost = costFunc(output, groundTruth)
    costList.append(cost)
    cost.backward()
    opt.step() 
    
    # Get gradient norm (From slides)
    gradAll = 0.0
    for p in gradNet.parameters():
        grad = 0.0
        if p.grad is not None:
            grad = (p.grad.cpu().data.numpy() ** 2).sum()
        gradAll += grad
    gradNorm = gradAll ** 0.5
    gradNormList.append(gradNorm)

# Visulaize Training process of arcsinh(5*np.pi*x) function
plt.plot(counterList, costList, 'y', label='Model')
plt.title("Learning Progression for arcsinh(5*np.pi*x)")
plt.xlabel("EPOCHS")
plt.ylabel("Mean Squared Error")
plt.legend(loc="upper right")
plt.show()

# Visulaize Gradient Norm of arcsinh(5*np.pi*x) function during training
plt.plot(counterList, gradNormList, 'y', label='Model')
plt.title("Gradient Norm during Training for arcsinh(5*np.pi*x)")
plt.xlabel("EPOCHS")
plt.ylabel("Gradient Norm")
plt.legend(loc="upper right")
plt.show()