llm-workshop/05-neural-networks/nn_numpy.py
Eric 1604671d36 Initial commit: LLM workshop materials
Five modules covering nanoGPT, Ollama, RAG, semantic search, and neural networks.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-28 07:11:01 -04:00

156 lines
5.8 KiB
Python

# nn_numpy.py
#
# A neural network with one hidden layer, built from scratch using numpy.
# Fits Cp(T) data for nitrogen gas at 1 bar (NIST WebBook).
#
# This demonstrates the core mechanics of a neural network:
# - Forward pass: input -> hidden layer -> activation -> output
# - Loss calculation (mean squared error)
# - Backpropagation: computing gradients of the loss w.r.t. each weight
# - Gradient descent: updating weights to minimize loss
#
# CHEG 667-013
# E. M. Furst
import numpy as np
import matplotlib.pyplot as plt
# ── Load and prepare data ──────────────────────────────────────
data = np.loadtxt("data/n2_cp.csv", delimiter=",", skiprows=1)
T_raw = data[:, 0] # Temperature (K)
Cp_raw = data[:, 1] # Heat capacity (kJ/kg/K)
# Normalize inputs and outputs to [0, 1] range.
# Neural networks train better when values are small and centered.
T_min, T_max = T_raw.min(), T_raw.max()
Cp_min, Cp_max = Cp_raw.min(), Cp_raw.max()
T = (T_raw - T_min) / (T_max - T_min) # shape: (N,)
Cp = (Cp_raw - Cp_min) / (Cp_max - Cp_min) # shape: (N,)
# Reshape for matrix operations: each sample is a row
X = T.reshape(-1, 1) # (N, 1) -- input matrix
Y = Cp.reshape(-1, 1) # (N, 1) -- target matrix
N = X.shape[0] # number of data points
# ── Network architecture ───────────────────────────────────────
#
# Input (1) --> Hidden (H neurons, tanh) --> Output (1)
#
# The hidden layer has H neurons. Each neuron computes:
# z = w * x + b (weighted sum)
# a = tanh(z) (activation -- introduces nonlinearity)
#
# The output layer combines the hidden activations:
# y_pred = W2 @ a + b2
H = 10 # number of neurons in the hidden layer
# Initialize weights randomly (small values)
# W1: (1, H) -- connects input to each hidden neuron
# b1: (1, H) -- one bias per hidden neuron
# W2: (H, 1) -- connects hidden neurons to output
# b2: (1, 1) -- output bias
np.random.seed(42)
W1 = np.random.randn(1, H) * 0.5
b1 = np.zeros((1, H))
W2 = np.random.randn(H, 1) * 0.5
b2 = np.zeros((1, 1))
# ── Training parameters ───────────────────────────────────────
learning_rate = 0.01
epochs = 5000
log_interval = 500
# ── Training loop ─────────────────────────────────────────────
losses = []
for epoch in range(epochs):
# ── Forward pass ──────────────────────────────────────────
# Step 1: hidden layer pre-activation
Z1 = X @ W1 + b1 # (N, H)
# Step 2: hidden layer activation (tanh)
A1 = np.tanh(Z1) # (N, H)
# Step 3: output layer (linear -- no activation)
Y_pred = A1 @ W2 + b2 # (N, 1)
# ── Loss ──────────────────────────────────────────────────
# Mean squared error
error = Y_pred - Y # (N, 1)
loss = np.mean(error ** 2)
losses.append(loss)
# ── Backpropagation ───────────────────────────────────────
# Compute gradients by applying the chain rule, working
# backward from the loss to each weight.
# Gradient of loss w.r.t. output
dL_dYpred = 2 * error / N # (N, 1)
# Gradients for output layer weights
dL_dW2 = A1.T @ dL_dYpred # (H, 1)
dL_db2 = np.sum(dL_dYpred, axis=0, keepdims=True) # (1, 1)
# Gradient flowing back through the hidden layer
dL_dA1 = dL_dYpred @ W2.T # (N, H)
# Derivative of tanh: d/dz tanh(z) = 1 - tanh(z)^2
dL_dZ1 = dL_dA1 * (1 - A1 ** 2) # (N, H)
# Gradients for hidden layer weights
dL_dW1 = X.T @ dL_dZ1 # (1, H)
dL_db1 = np.sum(dL_dZ1, axis=0, keepdims=True) # (1, H)
# ── Gradient descent ──────────────────────────────────────
# Update each weight in the direction that reduces the loss
W2 -= learning_rate * dL_dW2
b2 -= learning_rate * dL_db2
W1 -= learning_rate * dL_dW1
b1 -= learning_rate * dL_db1
if epoch % log_interval == 0 or epoch == epochs - 1:
print(f"Epoch {epoch:5d} Loss: {loss:.6f}")
# ── Results ────────────────────────────────────────────────────
# Predict on a fine grid for smooth plotting
T_fine = np.linspace(0, 1, 200).reshape(-1, 1)
A1_fine = np.tanh(T_fine @ W1 + b1)
Cp_pred_norm = A1_fine @ W2 + b2
# Convert back to physical units
T_fine_K = T_fine * (T_max - T_min) + T_min
Cp_pred = Cp_pred_norm * (Cp_max - Cp_min) + Cp_min
# ── Plot ───────────────────────────────────────────────────────
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
# Left: fit
ax1.plot(T_raw, Cp_raw, 'ko', markersize=6, label='NIST data')
ax1.plot(T_fine_K, Cp_pred, 'r-', linewidth=2, label=f'NN ({H} neurons)')
ax1.set_xlabel('Temperature (K)')
ax1.set_ylabel('$C_p$ (kJ/kg/K)')
ax1.set_title('$C_p(T)$ for N$_2$ at 1 bar')
ax1.legend()
# Right: training loss
ax2.semilogy(losses)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Mean Squared Error')
ax2.set_title('Training Loss')
plt.tight_layout()
plt.savefig('nn_fit.png', dpi=150)
plt.show()
print(f"\nFinal loss: {losses[-1]:.6f}")
print(f"Network: {1} input -> {H} hidden (tanh) -> {1} output")
print(f"Total parameters: {W1.size + b1.size + W2.size + b2.size}")