The LLM arc completes at section 05 (agentic systems), with neural networks as a standalone ML deep-dive in section 06. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
163 lines
5.5 KiB
Python
163 lines
5.5 KiB
Python
# nn_noisy.py
|
|
#
|
|
# What happens when we train a neural network on noisy data?
|
|
# This script adds Gaussian noise to the Cp data, trains with a
|
|
# train/validation split, and plots both loss curves to show overfitting.
|
|
#
|
|
# CHEG 667-013
|
|
# E. M. Furst
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
|
|
# ── Load data ─────────────────────────────────────────────────
|
|
|
|
data = np.loadtxt("data/n2_cp.csv", delimiter=",", skiprows=1)
|
|
T_raw = data[:, 0]
|
|
Cp_raw = data[:, 1]
|
|
|
|
# ── Add noise ─────────────────────────────────────────────────
|
|
|
|
noise_scale = 0.02 # kJ/kg/K — try 0.01, 0.02, 0.05, 0.1
|
|
rng = np.random.default_rng(seed=42)
|
|
Cp_noisy = Cp_raw + rng.normal(scale=noise_scale, size=Cp_raw.size)
|
|
|
|
# ── Train/validation split ────────────────────────────────────
|
|
#
|
|
# Hold out every 4th point for validation. This gives us 26 training
|
|
# points and 9 validation points — enough to see the overfitting signal.
|
|
|
|
val_mask = np.zeros(len(T_raw), dtype=bool)
|
|
val_mask[::4] = True
|
|
train_mask = ~val_mask
|
|
|
|
T_train, Cp_train = T_raw[train_mask], Cp_noisy[train_mask]
|
|
T_val, Cp_val = T_raw[val_mask], Cp_noisy[val_mask]
|
|
|
|
# ── Normalize to [0, 1] using training set statistics ─────────
|
|
|
|
T_min, T_max = T_train.min(), T_train.max()
|
|
Cp_min, Cp_max = Cp_train.min(), Cp_train.max()
|
|
|
|
def normalize_T(T):
|
|
return (T - T_min) / (T_max - T_min)
|
|
|
|
def normalize_Cp(Cp):
|
|
return (Cp - Cp_min) / (Cp_max - Cp_min)
|
|
|
|
def denormalize_Cp(Cp_norm):
|
|
return Cp_norm * (Cp_max - Cp_min) + Cp_min
|
|
|
|
X_train = torch.tensor(normalize_T(T_train), dtype=torch.float32).reshape(-1, 1)
|
|
Y_train = torch.tensor(normalize_Cp(Cp_train), dtype=torch.float32).reshape(-1, 1)
|
|
X_val = torch.tensor(normalize_T(T_val), dtype=torch.float32).reshape(-1, 1)
|
|
Y_val = torch.tensor(normalize_Cp(Cp_val), dtype=torch.float32).reshape(-1, 1)
|
|
|
|
# ── Define the network ────────────────────────────────────────
|
|
|
|
H = 10 # try 10, 20, 50 — watch what happens
|
|
|
|
model = nn.Sequential(
|
|
nn.Linear(1, H),
|
|
nn.Tanh(),
|
|
nn.Linear(H, 1),
|
|
)
|
|
|
|
n_params = sum(p.numel() for p in model.parameters())
|
|
print(f"Network: 1 -> {H} (tanh) -> 1")
|
|
print(f"Parameters: {n_params}")
|
|
print(f"Training points: {len(T_train)}")
|
|
print(f"Validation points: {len(T_val)}")
|
|
print(f"Noise scale: {noise_scale} kJ/kg/K\n")
|
|
|
|
# ── Training ──────────────────────────────────────────────────
|
|
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
|
loss_fn = nn.MSELoss()
|
|
|
|
epochs = 10000
|
|
log_interval = 1000
|
|
train_losses = []
|
|
val_losses = []
|
|
best_val_loss = float('inf')
|
|
best_epoch = 0
|
|
|
|
for epoch in range(epochs):
|
|
# --- Training step ---
|
|
model.train()
|
|
Y_pred = model(X_train)
|
|
train_loss = loss_fn(Y_pred, Y_train)
|
|
|
|
optimizer.zero_grad()
|
|
train_loss.backward()
|
|
optimizer.step()
|
|
|
|
# --- Validation step (no gradient computation) ---
|
|
model.eval()
|
|
with torch.no_grad():
|
|
val_pred = model(X_val)
|
|
val_loss = loss_fn(val_pred, Y_val)
|
|
|
|
train_losses.append(train_loss.item())
|
|
val_losses.append(val_loss.item())
|
|
|
|
# Track the best validation loss — same idea as nanoGPT's train.py
|
|
if val_loss.item() < best_val_loss:
|
|
best_val_loss = val_loss.item()
|
|
best_epoch = epoch
|
|
|
|
if epoch % log_interval == 0 or epoch == epochs - 1:
|
|
print(f"Epoch {epoch:5d} Train: {train_loss.item():.6f} "
|
|
f"Val: {val_loss.item():.6f}")
|
|
|
|
print(f"\nBest validation loss: {best_val_loss:.6f} at epoch {best_epoch}")
|
|
|
|
# ── Results ───────────────────────────────────────────────────
|
|
|
|
T_fine = torch.linspace(0, 1, 200).reshape(-1, 1)
|
|
model.eval()
|
|
with torch.no_grad():
|
|
Cp_pred_norm = model(T_fine)
|
|
|
|
T_fine_K = T_fine.numpy() * (T_max - T_min) + T_min
|
|
Cp_pred = denormalize_Cp(Cp_pred_norm.numpy())
|
|
|
|
# ── Plot ──────────────────────────────────────────────────────
|
|
|
|
fig, axes = plt.subplots(1, 3, figsize=(16, 5))
|
|
|
|
# Left: the fit
|
|
ax = axes[0]
|
|
ax.plot(T_train, Cp_train, 'ko', markersize=6, label='Train (noisy)')
|
|
ax.plot(T_val, Cp_val, 'bs', markersize=6, label='Validation (noisy)')
|
|
ax.plot(T_raw, Cp_raw, 'g--', linewidth=1, alpha=0.7, label='True (NIST)')
|
|
ax.plot(T_fine_K, Cp_pred, 'r-', linewidth=2, label=f'NN ({H} neurons)')
|
|
ax.set_xlabel('Temperature (K)')
|
|
ax.set_ylabel('$C_p$ (kJ/kg/K)')
|
|
ax.set_title(f'Noisy $C_p(T)$ — noise = {noise_scale}')
|
|
ax.legend(fontsize=8)
|
|
|
|
# Middle: training loss
|
|
ax = axes[1]
|
|
ax.semilogy(train_losses, label='Train loss')
|
|
ax.set_xlabel('Epoch')
|
|
ax.set_ylabel('MSE')
|
|
ax.set_title('Training Loss')
|
|
ax.legend()
|
|
|
|
# Right: train vs. validation loss
|
|
ax = axes[2]
|
|
ax.semilogy(train_losses, label='Train loss')
|
|
ax.semilogy(val_losses, label='Validation loss')
|
|
ax.axvline(best_epoch, color='gray', linestyle='--', alpha=0.5,
|
|
label=f'Best val (epoch {best_epoch})')
|
|
ax.set_xlabel('Epoch')
|
|
ax.set_ylabel('MSE')
|
|
ax.set_title('Train vs. Validation Loss')
|
|
ax.legend(fontsize=8)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('nn_fit_noisy.png', dpi=150)
|
|
plt.show()
|