Skip to content

Commit

Permalink
finish mdn and add a test data
Browse files Browse the repository at this point in the history
  • Loading branch information
pohaoc2 committed Nov 25, 2024
1 parent 477e8b4 commit dc35144
Show file tree
Hide file tree
Showing 13 changed files with 2,977 additions and 2,050 deletions.
501 changes: 501 additions & 0 deletions data/ARCADE/test_data.csv

Large diffs are not rendered by default.

Binary file added mdn_results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4,170 changes: 2,175 additions & 1,995 deletions poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ hydra-core = "^1.2.0"
black = "23.1.0"
mypy = "1.5.0"
statsmodels = "^0.14.0"
torch = "^2.5.1"

[tool.poetry.dev-dependencies]
black = "^23.1.0"
Expand Down
172 changes: 172 additions & 0 deletions sandbox/mdn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Set random seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Generate 1D training data
def generate_data(n_samples=200):
x = np.random.uniform(-3, 3, n_samples)
x = np.sort(x) # Sort for better visualization
y = np.zeros_like(x)

for i in range(n_samples):
if np.random.rand() > 0.5:
y[i] = 0.5 * np.sin(x[i]) + 5 + 0.5 * np.random.randn()
else:
y[i] = 0.5 * np.sin(x[i]) - 5 + 0.5 * np.random.randn()

return x, y

# MDN Model
class MDN(nn.Module):
def __init__(self, input_dim=1, hidden_dim=15, n_gaussians=2):
super(MDN, self).__init__()
self.n_gaussians = n_gaussians

self.hidden = nn.Sequential(
nn.Linear(input_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU()
)

self.pi = nn.Linear(hidden_dim, n_gaussians)
self.mu = nn.Linear(hidden_dim, n_gaussians)
self.sigma = nn.Linear(hidden_dim, n_gaussians)

def forward(self, x):
hidden = self.hidden(x)
pi = torch.softmax(self.pi(hidden), dim=1)
mu = self.mu(hidden)
sigma = torch.exp(self.sigma(hidden))
return pi, mu, sigma

# Generate data
x_train, y_train = generate_data(500)
layout_dummy = np.random.randint(0, 2, 500)
component = [1]*500
data = np.column_stack((x_train, y_train)) # Combine x_train and y_train column-wise
data = np.column_stack((data, layout_dummy)) # Add layout column
data = np.column_stack((data, component)) # Add component column
df = pd.DataFrame(data, columns=["X_train", "y_train", "LAYOUT", "COMPONENTS"])
df.to_csv("data/ARCADE/test_data.csv", index=False)

X = torch.FloatTensor(x_train).reshape(-1, 1)
Y = torch.FloatTensor(y_train).reshape(-1, 1)

# Create data loader
dataset = TensorDataset(X, Y)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

# Initialize model and optimizer
model = MDN(input_dim=1, hidden_dim=15, n_gaussians=2)
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Loss function
def mdn_loss(pi, mu, sigma, y):
gaussian_prob = (1.0 / (sigma * np.sqrt(2*np.pi))) * \
torch.exp(-0.5 * ((y.unsqueeze(1) - mu) / sigma)**2)
weighted_prob = pi * gaussian_prob
return -torch.log(torch.sum(weighted_prob, dim=1) + 1e-6).mean()

# Training loop
n_epochs = 200
losses = []

for epoch in range(n_epochs):
epoch_loss = 0
for batch_x, batch_y in loader:
optimizer.zero_grad()
pi, mu, sigma = model(batch_x)
loss = mdn_loss(pi, mu, sigma, batch_y)
loss.backward()
optimizer.step()
epoch_loss += loss.item()

losses.append(epoch_loss / len(loader))

if (epoch + 1) % 100 == 0:
print(f'Epoch {epoch+1}/{n_epochs}, Loss: {losses[-1]:.4f}')

# Generate predictions for visualization
model.eval()
X_test, y_test = generate_data(200)
X_test = torch.FloatTensor(X_test).reshape(-1, 1)
y_test = torch.FloatTensor(y_test).reshape(-1, 1)

with torch.no_grad():
pi, mu, sigma = model(X_test)
pi = pi.numpy()
mu = mu.numpy()
sigma = sigma.numpy()

# Calculate R2 score
from sklearn.metrics import r2_score
y_pred = np.sum(pi * mu, axis=1)
print(y_pred)
r2 = r2_score(y_test, y_pred)
print(f"\nR2 Score: {r2:.4f}")
X_test = X_test.numpy()

# Plotting
plt.figure(figsize=(15, 10))

# Plot 1: Training Data and Predictions
plt.subplot(2, 1, 1)

# Plot training data
plt.scatter(x_train, y_train, alpha=0.5, label='Training Data', color='blue', s=20)

# Plot predicted means and uncertainties for each Gaussian component
colors = ['red', 'green']
for i in range(model.n_gaussians):
# Plot mean
plt.plot(X_test, mu[:, i], color=colors[i],
label=f'Mean (Gaussian {i+1})', linewidth=2)

# Plot uncertainty bounds (±2 sigma)
plt.fill_between(X_test.flatten(),
mu[:, i] - 2*sigma[:, i],
mu[:, i] + 2*sigma[:, i],
color=colors[i], alpha=0.2,
label=f'Uncertainty (Gaussian {i+1})')

plt.xlabel('Input (x)')
plt.ylabel('Output (y)')
plt.title('Training Data with Predicted Means and Uncertainties')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot 2: Mixing Coefficients
plt.subplot(2, 1, 2)
for i in range(model.n_gaussians):
plt.plot(X_test, pi[:, i], color=colors[i],
label=f'π{i+1}', linewidth=2)
plt.xlabel('Input (x)')
plt.ylabel('Mixing Coefficient (π)')
plt.title('Mixing Coefficients vs Input')
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
plt.savefig('mdn_results.png')

# Print example predictions for specific x values
test_points = [-1, 0, 1]
print("\nExample predictions at specific points:")
for x_val in test_points:
x = torch.tensor([[x_val]], dtype=torch.float32)
with torch.no_grad():
pi, mu, sigma = model(x)
print(f"\nx = {x_val}")
print(f"Mixing coefficients (π): {pi.numpy().flatten()}")
print(f"Means (μ): {mu.numpy().flatten()}")
print(f"Standard deviations (σ): {sigma.numpy().flatten()}")
Binary file added sandbox/mdn_results.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
34 changes: 14 additions & 20 deletions src/conf/cs/models/mdn.yaml
Original file line number Diff line number Diff line change
@@ -1,25 +1,19 @@
mlp:
mdn:
continuous:
alpha:
learning_rate:
type: float
range: [0.0001, 1]
range: [0.001, 0.01]
search: log

discrete:
activation:
- identity
- logistic
- tanh
- relu
hidden_layer_sizes:
- (5,)
- (5,5)
- (5,10)
- (25,)
- (25, 25)
- (25, 50)
- (50,)
- (50, 25)
- (50, 50)
hidden_dim:
- 16
num_gaussians:
- 2
output_dim:
- 1

static:
solver: lbfgs
max_iter: 1000
epochs: 500


16 changes: 16 additions & 0 deletions src/conf/cs/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
defaults:
- models/mdn
- _self_
experiments:
TEST-MDN:
files:
data: test_data.csv
paths:
log: ${hydra:runtime.cwd}/logs/ARCADE/test/
data: ${hydra:runtime.cwd}/data/ARCADE/
results: ${hydra:runtime.cwd}/results/ARCADE/test/
data:
features:
- X_train
response:
- y_train
5 changes: 1 addition & 4 deletions src/conf/cs/topological_0_metric_8.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
defaults:
- models/mlr
- models/rf
- models/svr
- models/mlp
- models/mdn
- _self_
experiments:
COUPLED-C:
Expand Down
14 changes: 12 additions & 2 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,17 @@ def main(config: DictConfig) -> None:
experiment.save_train_test()
experiment.save_manifest()
experiment.run()


model = experiment._best_models['MDN'].model.model
test_points = [-1, 0, 1]
print("\nExample predictions at specific points:")
import torch
for x_val in test_points:
x = torch.tensor([[x_val]], dtype=torch.float32)
with torch.no_grad():
pi, mu, sigma = model.forward(x)
print(f"\nx = {x_val}")
print(f"Mixing coefficients (π): {pi.numpy().flatten()}")
print(f"Means (μ): {mu.numpy().flatten()}")
print(f"Standard deviations (σ): {sigma.numpy().flatten()}")
if __name__ == "__main__":
main()
3 changes: 2 additions & 1 deletion src/config_utils/assign_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@
from permutation.models.rf import RF
from permutation.models.svr import SVReg
from permutation.models.mlp import MLP
from permutation.models.mdn import MDNReg
from permutation.models.modelprotocol import Model
from permutation.models.hyperparameters import HParams

MODEL_DEPENDENCIES = {"mlr": MLR, "rf": RF, "svr": SVReg, "mlp": MLP}
MODEL_DEPENDENCIES = {"mlr": MLR, "rf": RF, "svr": SVReg, "mlp": MLP, "mdn": MDNReg}


def _get_correct_model(model_type: str, hparams: HParams) -> Model:
Expand Down
Loading

0 comments on commit dc35144

Please sign in to comment.