# nn_numpy.py # # A neural network with one hidden layer, built from scratch using numpy. # Fits Cp(T) data for nitrogen gas at 1 bar (NIST WebBook). # # This demonstrates the core mechanics of a neural network: # - Forward pass: input -> hidden layer -> activation -> output # - Loss calculation (mean squared error) # - Backpropagation: computing gradients of the loss w.r.t. each weight # - Gradient descent: updating weights to minimize loss # # CHEG 667-013 # E. M. Furst import numpy as np import matplotlib.pyplot as plt # ── Load and prepare data ────────────────────────────────────── data = np.loadtxt("data/n2_cp.csv", delimiter=",", skiprows=1) T_raw = data[:, 0] # Temperature (K) Cp_raw = data[:, 1] # Heat capacity (kJ/kg/K) # Normalize inputs and outputs to [0, 1] range. # Neural networks train better when values are small and centered. T_min, T_max = T_raw.min(), T_raw.max() Cp_min, Cp_max = Cp_raw.min(), Cp_raw.max() T = (T_raw - T_min) / (T_max - T_min) # shape: (N,) Cp = (Cp_raw - Cp_min) / (Cp_max - Cp_min) # shape: (N,) # Reshape for matrix operations: each sample is a row X = T.reshape(-1, 1) # (N, 1) -- input matrix Y = Cp.reshape(-1, 1) # (N, 1) -- target matrix N = X.shape[0] # number of data points # ── Network architecture ─────────────────────────────────────── # # Input (1) --> Hidden (H neurons, tanh) --> Output (1) # # The hidden layer has H neurons. Each neuron computes: # z = w * x + b (weighted sum) # a = tanh(z) (activation -- introduces nonlinearity) # # The output layer combines the hidden activations: # y_pred = W2 @ a + b2 H = 10 # number of neurons in the hidden layer # Initialize weights randomly (small values) # W1: (1, H) -- connects input to each hidden neuron # b1: (1, H) -- one bias per hidden neuron # W2: (H, 1) -- connects hidden neurons to output # b2: (1, 1) -- output bias np.random.seed(42) W1 = np.random.randn(1, H) * 0.5 b1 = np.zeros((1, H)) W2 = np.random.randn(H, 1) * 0.5 b2 = np.zeros((1, 1)) # ── Training parameters ─────────────────────────────────────── learning_rate = 0.01 epochs = 5000 log_interval = 500 # ── Training loop ───────────────────────────────────────────── losses = [] for epoch in range(epochs): # ── Forward pass ────────────────────────────────────────── # Step 1: hidden layer pre-activation Z1 = X @ W1 + b1 # (N, H) # Step 2: hidden layer activation (tanh) A1 = np.tanh(Z1) # (N, H) # Step 3: output layer (linear -- no activation) Y_pred = A1 @ W2 + b2 # (N, 1) # ── Loss ────────────────────────────────────────────────── # Mean squared error error = Y_pred - Y # (N, 1) loss = np.mean(error ** 2) losses.append(loss) # ── Backpropagation ─────────────────────────────────────── # Compute gradients by applying the chain rule, working # backward from the loss to each weight. # Gradient of loss w.r.t. output dL_dYpred = 2 * error / N # (N, 1) # Gradients for output layer weights dL_dW2 = A1.T @ dL_dYpred # (H, 1) dL_db2 = np.sum(dL_dYpred, axis=0, keepdims=True) # (1, 1) # Gradient flowing back through the hidden layer dL_dA1 = dL_dYpred @ W2.T # (N, H) # Derivative of tanh: d/dz tanh(z) = 1 - tanh(z)^2 dL_dZ1 = dL_dA1 * (1 - A1 ** 2) # (N, H) # Gradients for hidden layer weights dL_dW1 = X.T @ dL_dZ1 # (1, H) dL_db1 = np.sum(dL_dZ1, axis=0, keepdims=True) # (1, H) # ── Gradient descent ────────────────────────────────────── # Update each weight in the direction that reduces the loss W2 -= learning_rate * dL_dW2 b2 -= learning_rate * dL_db2 W1 -= learning_rate * dL_dW1 b1 -= learning_rate * dL_db1 if epoch % log_interval == 0 or epoch == epochs - 1: print(f"Epoch {epoch:5d} Loss: {loss:.6f}") # ── Results ──────────────────────────────────────────────────── # Predict on a fine grid for smooth plotting T_fine = np.linspace(0, 1, 200).reshape(-1, 1) A1_fine = np.tanh(T_fine @ W1 + b1) Cp_pred_norm = A1_fine @ W2 + b2 # Convert back to physical units T_fine_K = T_fine * (T_max - T_min) + T_min Cp_pred = Cp_pred_norm * (Cp_max - Cp_min) + Cp_min # ── Plot ─────────────────────────────────────────────────────── fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) # Left: fit ax1.plot(T_raw, Cp_raw, 'ko', markersize=6, label='NIST data') ax1.plot(T_fine_K, Cp_pred, 'r-', linewidth=2, label=f'NN ({H} neurons)') ax1.set_xlabel('Temperature (K)') ax1.set_ylabel('$C_p$ (kJ/kg/K)') ax1.set_title('$C_p(T)$ for N$_2$ at 1 bar') ax1.legend() # Right: training loss ax2.semilogy(losses) ax2.set_xlabel('Epoch') ax2.set_ylabel('Mean Squared Error') ax2.set_title('Training Loss') plt.tight_layout() plt.savefig('nn_fit.png', dpi=150) plt.show() print(f"\nFinal loss: {losses[-1]:.6f}") print(f"Network: {1} input -> {H} hidden (tanh) -> {1} output") print(f"Total parameters: {W1.size + b1.size + W2.size + b2.size}")