Five modules covering nanoGPT, Ollama, RAG, semantic search, and neural networks. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
156 lines
5.8 KiB
Python
156 lines
5.8 KiB
Python
# nn_numpy.py
|
|
#
|
|
# A neural network with one hidden layer, built from scratch using numpy.
|
|
# Fits Cp(T) data for nitrogen gas at 1 bar (NIST WebBook).
|
|
#
|
|
# This demonstrates the core mechanics of a neural network:
|
|
# - Forward pass: input -> hidden layer -> activation -> output
|
|
# - Loss calculation (mean squared error)
|
|
# - Backpropagation: computing gradients of the loss w.r.t. each weight
|
|
# - Gradient descent: updating weights to minimize loss
|
|
#
|
|
# CHEG 667-013
|
|
# E. M. Furst
|
|
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
|
|
# ── Load and prepare data ──────────────────────────────────────
|
|
|
|
data = np.loadtxt("data/n2_cp.csv", delimiter=",", skiprows=1)
|
|
T_raw = data[:, 0] # Temperature (K)
|
|
Cp_raw = data[:, 1] # Heat capacity (kJ/kg/K)
|
|
|
|
# Normalize inputs and outputs to [0, 1] range.
|
|
# Neural networks train better when values are small and centered.
|
|
T_min, T_max = T_raw.min(), T_raw.max()
|
|
Cp_min, Cp_max = Cp_raw.min(), Cp_raw.max()
|
|
|
|
T = (T_raw - T_min) / (T_max - T_min) # shape: (N,)
|
|
Cp = (Cp_raw - Cp_min) / (Cp_max - Cp_min) # shape: (N,)
|
|
|
|
# Reshape for matrix operations: each sample is a row
|
|
X = T.reshape(-1, 1) # (N, 1) -- input matrix
|
|
Y = Cp.reshape(-1, 1) # (N, 1) -- target matrix
|
|
|
|
N = X.shape[0] # number of data points
|
|
|
|
# ── Network architecture ───────────────────────────────────────
|
|
#
|
|
# Input (1) --> Hidden (H neurons, tanh) --> Output (1)
|
|
#
|
|
# The hidden layer has H neurons. Each neuron computes:
|
|
# z = w * x + b (weighted sum)
|
|
# a = tanh(z) (activation -- introduces nonlinearity)
|
|
#
|
|
# The output layer combines the hidden activations:
|
|
# y_pred = W2 @ a + b2
|
|
|
|
H = 10 # number of neurons in the hidden layer
|
|
|
|
# Initialize weights randomly (small values)
|
|
# W1: (1, H) -- connects input to each hidden neuron
|
|
# b1: (1, H) -- one bias per hidden neuron
|
|
# W2: (H, 1) -- connects hidden neurons to output
|
|
# b2: (1, 1) -- output bias
|
|
np.random.seed(42)
|
|
W1 = np.random.randn(1, H) * 0.5
|
|
b1 = np.zeros((1, H))
|
|
W2 = np.random.randn(H, 1) * 0.5
|
|
b2 = np.zeros((1, 1))
|
|
|
|
# ── Training parameters ───────────────────────────────────────
|
|
|
|
learning_rate = 0.01
|
|
epochs = 5000
|
|
log_interval = 500
|
|
|
|
# ── Training loop ─────────────────────────────────────────────
|
|
|
|
losses = []
|
|
|
|
for epoch in range(epochs):
|
|
|
|
# ── Forward pass ──────────────────────────────────────────
|
|
# Step 1: hidden layer pre-activation
|
|
Z1 = X @ W1 + b1 # (N, H)
|
|
|
|
# Step 2: hidden layer activation (tanh)
|
|
A1 = np.tanh(Z1) # (N, H)
|
|
|
|
# Step 3: output layer (linear -- no activation)
|
|
Y_pred = A1 @ W2 + b2 # (N, 1)
|
|
|
|
# ── Loss ──────────────────────────────────────────────────
|
|
# Mean squared error
|
|
error = Y_pred - Y # (N, 1)
|
|
loss = np.mean(error ** 2)
|
|
losses.append(loss)
|
|
|
|
# ── Backpropagation ───────────────────────────────────────
|
|
# Compute gradients by applying the chain rule, working
|
|
# backward from the loss to each weight.
|
|
|
|
# Gradient of loss w.r.t. output
|
|
dL_dYpred = 2 * error / N # (N, 1)
|
|
|
|
# Gradients for output layer weights
|
|
dL_dW2 = A1.T @ dL_dYpred # (H, 1)
|
|
dL_db2 = np.sum(dL_dYpred, axis=0, keepdims=True) # (1, 1)
|
|
|
|
# Gradient flowing back through the hidden layer
|
|
dL_dA1 = dL_dYpred @ W2.T # (N, H)
|
|
|
|
# Derivative of tanh: d/dz tanh(z) = 1 - tanh(z)^2
|
|
dL_dZ1 = dL_dA1 * (1 - A1 ** 2) # (N, H)
|
|
|
|
# Gradients for hidden layer weights
|
|
dL_dW1 = X.T @ dL_dZ1 # (1, H)
|
|
dL_db1 = np.sum(dL_dZ1, axis=0, keepdims=True) # (1, H)
|
|
|
|
# ── Gradient descent ──────────────────────────────────────
|
|
# Update each weight in the direction that reduces the loss
|
|
W2 -= learning_rate * dL_dW2
|
|
b2 -= learning_rate * dL_db2
|
|
W1 -= learning_rate * dL_dW1
|
|
b1 -= learning_rate * dL_db1
|
|
|
|
if epoch % log_interval == 0 or epoch == epochs - 1:
|
|
print(f"Epoch {epoch:5d} Loss: {loss:.6f}")
|
|
|
|
# ── Results ────────────────────────────────────────────────────
|
|
|
|
# Predict on a fine grid for smooth plotting
|
|
T_fine = np.linspace(0, 1, 200).reshape(-1, 1)
|
|
A1_fine = np.tanh(T_fine @ W1 + b1)
|
|
Cp_pred_norm = A1_fine @ W2 + b2
|
|
|
|
# Convert back to physical units
|
|
T_fine_K = T_fine * (T_max - T_min) + T_min
|
|
Cp_pred = Cp_pred_norm * (Cp_max - Cp_min) + Cp_min
|
|
|
|
# ── Plot ───────────────────────────────────────────────────────
|
|
|
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
|
|
|
|
# Left: fit
|
|
ax1.plot(T_raw, Cp_raw, 'ko', markersize=6, label='NIST data')
|
|
ax1.plot(T_fine_K, Cp_pred, 'r-', linewidth=2, label=f'NN ({H} neurons)')
|
|
ax1.set_xlabel('Temperature (K)')
|
|
ax1.set_ylabel('$C_p$ (kJ/kg/K)')
|
|
ax1.set_title('$C_p(T)$ for N$_2$ at 1 bar')
|
|
ax1.legend()
|
|
|
|
# Right: training loss
|
|
ax2.semilogy(losses)
|
|
ax2.set_xlabel('Epoch')
|
|
ax2.set_ylabel('Mean Squared Error')
|
|
ax2.set_title('Training Loss')
|
|
|
|
plt.tight_layout()
|
|
plt.savefig('nn_fit.png', dpi=150)
|
|
plt.show()
|
|
|
|
print(f"\nFinal loss: {losses[-1]:.6f}")
|
|
print(f"Network: {1} input -> {H} hidden (tanh) -> {1} output")
|
|
print(f"Total parameters: {W1.size + b1.size + W2.size + b2.size}")
|