Today I came across a great talk by Christoph Henkelmann on Autoencoders and I wanted to try building the various Autoencoder architectures described in the talk with PyTorch. But first I need to get familiar with the library since I've only used Google's TensorFlow in the past.
I found a really helpful tutorial by AssemblyAI and merged it with some content from the PyTorch docs to create this post for quick reference.
import torch
# Create a scalar (rank 0 tensor)
torch.empty(1)
# Create a vector (rank 1 tensor)
torch.empty(3)
# Create a matrix (rank 2 tensor)
torch.empty(2, 3)
# Create a 3D tensor
torch.empty(2, 2, 3)
# Create a 4D tensor
torch.empty(2, 2, 2, 3)
# Create a 2x3 tensor with random values between 0 and 1
torch.rand(2, 3)
# Create a 2x3 tensor filled with zeros
torch.zeros(2, 3)
# Create a 2x3 tensor filled with ones
torch.ones(2, 3)
To inspect constructed tensors...
x = torch.rand(2, 3)
# Get the size of the tensor
print(x.size()) # Output: torch.Size([2, 3])
# Get the shape of the tensor (same as size)
print(x.shape) # Output: torch.Size([2, 3])
# Get the data type of the tensor elements
print(x.dtype) # Output: torch.float32
# Create a tensor with a specific data type
torch.zeros(5, 2, dtype=torch.float16)
# Create a tensor from a list
torch.tensor([5.5, 3])
# Create a tensor that requires gradient calculations
x = torch.tensor([5.5, 3], requires_grad=True)
x = torch.ones(3, 3)
y = torch.rand(3, 3)
z = x + y # elementwise addition
z = torch.add(x, y)
# suffix with `_` for inplace operations
x.add_(y)
z = x - y # subtract
z = x * y # multiply
z = x / y # divide
To get parts of the tensor
# Arithmetic Operations
x = torch.ones(3, 3)
y = torch.rand(3, 3)
# Element-wise addition
z = x + y
z = torch.add(x, y)
# In-place addition (modifies x directly)
x.add_(y)
# Subtraction, multiplication, division are similar
z = x - y
z = x * y
z = x / y
# Slicing
x = torch.rand(5, 3)
# Get all rows of column 0
x[:, 0]
# Get row 1, all columns
x[1, :]
# Get the element at row 1, column 1
x[1, 1]
# Get the value of the element as a Python number
x[1, 1].item()
The #view()
method can be used to reshape a tensor into a new shape without changing its underlying data.
# 4x4 matric with random values drawn from a normal
# distribution with mean 0 and standard deviation 1
x = torch.randn(4, 4)
# reshapes as 1x16
y = x.view(16)
# The `-1` indicates PyTorch should automatically calculate the size
# of the first dimension based on specified second dimension
y = x.view(-1, 8) # 2x8
Checking for hardware availability
# Check if a CUDA-capable GPU is available
if torch.cuda.is_available():
device_label = 'cuda'
else:
device_label = 'cpu'
# Create a PyTorch device object
device = torch.device(device_label)
# Create a tensor on the specified device
x = torch.rand(2, 2, device=device)
# Move an existing tensor to the specified device
x = torch.rand(2, 2).to(device)
PyTorch uses a technique called automatic differentiation. The backward()
function on a tensor calculates the gradient.
# Create a tensor that requires gradient calculations
x = torch.randn(3, requires_grad=True)
# Perform an operation
y = x + 5
# Check the gradient function associated with y
print(y.grad_fn) # Output: <AddBackward0 object at ...>
# Calculate the mean of y
z = y.mean()
# Perform backpropagation to calculate gradients
z.backward()
# Print the gradient of x
print(x.grad) # Output: tensor([0.3333, 0.3333, 0.3333])
By default, tensors are not tracked. In the example below, y
has a gradient function because of the multiplication, but it's not associated with x
.
x = torch.randn(2, 2)
y = (x * x).sum()
# y has a gradient function because of the operations
print(x, y.grad_fn)
# Enable tracking with `requires_grad` flag:
x.requires_grad_(True)
# y's gradient function is now associated with x
print(x, y.grad_fn)
To detach x
from the computational graph and disable gradient tracking:
# Detach x from the computation graph (no gradient tracking)
x.detach()
print(x.requires_grad, y.requires_grad)
# (True, False)
# Disable grad tracking `with torch.no_grad()`
x = torch.randn(2, 2, requires_grad=True)
print(x.requires_grad) # True
with torch.no_grad():
y = x ** 2
print(y.requires_grad) # False
Here's how to implement a simple gradient descent algorithm:
# Input and target data
X = torch.tensor([1, 2, 3])
Y = torch.tensor([2, 4, 6])
# Initialize the weight parameter
w = torch.tensor(0, dtype=torch.float32, requires_grad=True)
# Define the forward pass (linear model)
def forward(x):
return w * x
# Define the loss function (mean squared error)
def loss(y, y_pred):
return ((y_pred - y) ** 2).mean()
# Test input
X_test = 5.0
# Prediction before training
print(f'Prediction before training: f({X_test}) = {forward(X_test).item():.3f}')
# Training parameters
lr = 0.01 # Learning rate
n_epochs = 100
# Gradient Descent Loop
for epoch in range(n_epochs):
# Forward pass
y_pred = forward(X)
# Calculate the loss
l = loss(Y, y_pred)
# Calculate gradients
l.backward()
# Update the weight parameter
with torch.no_grad():
w -= lr * w.grad
# Zero out the gradients
w.grad.zero_()
# Print progress
if (epoch + 1) % 10 == 0:
print(f'epoch {epoch + 1}: w = {w.item():.3f}, loss = {l.item():.3f}')
# Prediction after training
print(f'Prediction after training: f({X_test}) = {forward(X_test).item():.3f}')
PyTorch provides a more streamlined way to define and train models using the nn.Module
class, loss functions from torch.nn
, and optimizers from torch.optim
:
import torch.nn as nn
# Define the training data (input and target)
X = torch.tensor([[1], [2], [3], [4], [5], [6], [7], [8]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8], [10], [12], [14], [16]], dtype=torch.float32)
# Determine the number of samples and features
n_samples, n_features = X.shape
print(f'n_samples = {n_samples}, n_features = {n_features}')
# Define a test input
X_test = torch.tensor([5], dtype=torch.float32)
# Define the Linear Regression Model
class LinearRegression(nn.Module):
def __init__(self, input_dim, output_dim):
super(LinearRegression, self).__init__()
# Define the linear layer
self.lin = nn.Linear(input_dim, output_dim, dtype=torch.float32)
def forward(self, x):
# Define the forward pass
return self.lin(x)
# Instantiate the model
input_size, output_size = n_features, n_features
model = LinearRegression(input_size, output_size)
# Prediction before training
print(f'Prediction before training: f({X_test.item()}) = {model(X_test).item():.3f}')
# Training parameters
learning_rate = 0.01
n_epochs = 100
# Define the loss function (Mean Squared Error)
loss = nn.MSELoss()
# Define the optimizer (Stochastic Gradient Descent)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
# Access the model parameters
params = model.parameters()
print(list(params))
# Training loop
for epoch in range(n_epochs):
# Forward pass: Make predictions
y_predicted = model(X)
# Calculate the loss
l = loss(Y, y_predicted)
# Backward pass: Calculate gradients
l.backward()
# Update the weights using the optimizer
optimizer.step()
# Zero out the gradients for the next iteration
optimizer.zero_grad()
# Print progress every 10 epochs
if (epoch + 1) % 10 == 0:
w, b = model.parameters() # Get the weights and bias
print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l.item():.3f}')
# Prediction after training
print(f'Prediction after training: f({X_test.item()}) = {model(X_test).item():.3f}')
This example demonstrates training a neural network on the MNIST dataset, which consists of handwritten digits.
Define a NeuralNet using the nn.Module
class:
import torch.nn as nn
class NeuralNet(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(NeuralNet, self).__init__()
self.l1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.l2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.l1(x)
out = self.relu(out)
out = self.l2(out)
return out
Download the MNIST dataset using built-in PyTorch functions.
import torch
import torchvision
import torchvision.transforms as transforms
batch_size = 100
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Load MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
train=True,
transform=transforms.ToTensor(),
download=True)
test_dataset = torchvision.datasets.MNIST(root='./data',
train=False,
transform=transforms.ToTensor())
# Create data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=True)
examples = iter(test_loader)
example_data, example_targets = next(examples)
Specify Hyperparameters and a training loop to teach the model.
# Hyperparameters
input_size = 784 # 28x28 image size
hidden_size = 500
num_classes = 10
num_epochs = 2
learning_rate = 0.001
model = NeuralNet(input_size, hidden_size, num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
n_total_steps = len(train_loader) # 600
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
# origin shape: [100, 1, 28, 28]
# resized: [100, 784]
images = images.reshape(-1, 28 * 28).to(device)
labels = labels.to(device)
# Forward pass and loss calculation
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
loss.backward()
optimizer.step()
optimizer.zero_grad()
if (i+1) % 100 == 0:
print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {loss.item():.4f}')
with torch.no_grad():
n_correct = 0
n_samples = 0
for images, labels in test_loader:
images = images.reshape(-1, 28*28).to(device)
labels = labels.to(device)
outputs = model(images)
# max returns (value, index)
_, predicted = torch.max(outputs.data, 1)
n_samples += labels.size(0)
n_correct += (predicted == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the network on the 10000 test images: {acc} %')
ProTip: It's possible to chart the images using matplotlib.pyplot
even as tensors.
import matplotlib.pyplot as plt
examples = iter(test_loader)
example_data, example_targets = next(examples)
for i in range(6):
plt.subplot(2,3,i+1)
plt.imshow(example_data[i][0], cmap='gray')
plt.show()
This example demonstrates training a CNN on the CIFAR10 dataset, which consists of color images in 10 classes.
The neural network below is a simple convolutional neural network (CNN) designed for image classification tasks.
The first convolutional layer takes an input with 3 channels (e.g., an RGB image), applies 32 filters, each of size 3x3.
The first max-pooling layer reduces the spatial dimensions by a factor of 2 (both width and height).
The second convolutional layer takes the 32-channel input from the previous layer and applies 64 filters of size 3x3.
The third convolutional layer takes the 64-channel input from the previous layer and applies another set of 64 filters of size 3x3.
The first fully connected layer takes the flattened input from the convolutional layers and outputs 64 units.
The second fully connected layer takes the 64-unit input and outputs 10 units, corresponding to the number of classes in the classification task.
In the forward pass, each of the above layers are applied to the image to achieve the goal of the classification task.
import torch.nn as nn
import torch.nn.functional as F
class ConvNet(nn.Module):
def __init__(self):
super(ConvNet, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(32, 64, 3)
self.conv3 = nn.Conv2d(64, 64, 3)
self.fc1 = nn.Linear(64 * 4 * 4, 64)
self.fc2 = nn.Linear(64, 10)
def forward(self, x):
# N, 3, 32, 32
x = F.relu(self.conv1(x)) # -> N, 32, 30, 30
x = self.pool(x) # -> N, 32, 15, 15
x = F.relu(self.conv2(x)) # -> N, 64, 13, 13
x = self.pool(x) # -> N, 64, 6, 6
x = F.relu(self.conv3(x)) # -> N, 64, 4, 4
x = torch.flatten(x, 1) # -> N, 1024
x = F.relu(self.fc1(x)) # -> N, 64
x = self.fc2(x) # -> N, 10
return x
Download the CIFAR10 dataset using built-in PyTorch functions.
import torch
import torchvision
import torchvision.transforms as transforms
# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Define transformations for the dataset
transform = transforms.Compose(
[transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# Load the CIFAR10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
download=True, transform=transform)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
download=True, transform=transform)
# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
shuffle=False)
# Define the classes in CIFAR10
classes = ('plane', 'car', 'bird', 'cat',
'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
Specify Hyperparameters and a training loop to teach the model.
# Hyperparameters
num_epochs = 10
batch_size = 32
learning_rate = 0.001
# Instantiate the model and move to the device
model = ConvNet().to(device)
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
running_loss = 0.0
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
loss.backward()
optimizer.step()
optimizer.zero_grad()
running_loss += loss.item()
print(f'[{epoch + 1}] loss: {running_loss / n_total_steps:.3f}')
print('Finished Training')
# Save the trained model
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)
# Load the model
loaded_model = ConvNet()
loaded_model.load_state_dict(torch.load(PATH))
loaded_model.to(device)
loaded_model.eval()
# Evaluate the trained model
with torch.no_grad():
n_correct = 0
n_correct2 = 0
n_samples = len(test_loader.dataset)
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
# Get predicted class
_, predicted = torch.max(outputs, 1)
n_correct += (predicted == labels).sum().item()
# Evaluate the loaded model
outputs2 = loaded_model(images)
_, predicted2 = torch.max(outputs2, 1)
n_correct2 += (predicted2 == labels).sum().item()
acc = 100.0 * n_correct / n_samples
print(f'Accuracy of the model: {acc} %')
acc = 100.0 * n_correct2 / n_samples
print(f'Accuracy of the loaded model: {acc} %')
Plot using matplotlib
with some help from numpy
.
import matplotlib.pyplot as plt
import numpy as np
# Function to display an image
def imshow(img):
img = img / 2 + 0.5 # Unnormalize
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
plt.show()
# Display an image from the dataset (optional)
dataiter = iter(train_loader)
images, labels = next(dataiter)
img_grid = torchvision.utils.make_grid(images[0:25], nrow=5)
imshow(img_grid)