andy pai's tils

PyTorch Basics

Today I came across a great talk by Christoph Henkelmann on Autoencoders and I wanted to try building the various Autoencoder architectures described in the talk with PyTorch. But first I need to get familiar with the library since I've only used Google's TensorFlow in the past.

I found a really helpful tutorial by AssemblyAI and merged it with some content from the PyTorch docs to create this post for quick reference.

Tensors

import torch

# Create a scalar (rank 0 tensor)
torch.empty(1)

# Create a vector (rank 1 tensor)
torch.empty(3)

# Create a matrix (rank 2 tensor)
torch.empty(2, 3)

# Create a 3D tensor
torch.empty(2, 2, 3)

# Create a 4D tensor
torch.empty(2, 2, 2, 3)

# Create a 2x3 tensor with random values between 0 and 1
torch.rand(2, 3)

# Create a 2x3 tensor filled with zeros
torch.zeros(2, 3)

# Create a 2x3 tensor filled with ones
torch.ones(2, 3)

To inspect constructed tensors...

x = torch.rand(2, 3)

# Get the size of the tensor
print(x.size())  # Output: torch.Size([2, 3])

# Get the shape of the tensor (same as size)
print(x.shape)   # Output: torch.Size([2, 3])

# Get the data type of the tensor elements
print(x.dtype)   # Output: torch.float32

# Create a tensor with a specific data type
torch.zeros(5, 2, dtype=torch.float16)

# Create a tensor from a list
torch.tensor([5.5, 3])

# Create a tensor that requires gradient calculations
x = torch.tensor([5.5, 3], requires_grad=True)

Tensor Operations

Arithmetic

x = torch.ones(3, 3)
y = torch.rand(3, 3)
z = x + y # elementwise addition
z = torch.add(x, y)

# suffix with `_` for inplace operations
x.add_(y)

z = x - y # subtract
z = x * y # multiply
z = x / y # divide

Slicing & Views

To get parts of the tensor

# Arithmetic Operations
x = torch.ones(3, 3)
y = torch.rand(3, 3)

# Element-wise addition
z = x + y
z = torch.add(x, y)

# In-place addition (modifies x directly)
x.add_(y)

# Subtraction, multiplication, division are similar
z = x - y
z = x * y
z = x / y

# Slicing
x = torch.rand(5, 3)

# Get all rows of column 0
x[:, 0]

# Get row 1, all columns
x[1, :]

# Get the element at row 1, column 1
x[1, 1]

# Get the value of the element as a Python number
x[1, 1].item()

The #view() method can be used to reshape a tensor into a new shape without changing its underlying data.

# 4x4 matric with random values drawn from a normal
# distribution with mean 0 and standard deviation 1
x = torch.randn(4, 4)

# reshapes as 1x16
y = x.view(16)

# The `-1` indicates PyTorch should automatically calculate the size
# of the first dimension based on specified second dimension
y = x.view(-1, 8) # 2x8

GPU Support

Checking for hardware availability

# Check if a CUDA-capable GPU is available
if torch.cuda.is_available():
  device_label = 'cuda'
else:
  device_label = 'cpu'

# Create a PyTorch device object
device = torch.device(device_label)

# Create a tensor on the specified device
x = torch.rand(2, 2, device=device)

# Move an existing tensor to the specified device
x = torch.rand(2, 2).to(device)

Autograd

PyTorch uses a technique called automatic differentiation. The backward() function on a tensor calculates the gradient.

# Create a tensor that requires gradient calculations
x = torch.randn(3, requires_grad=True)

# Perform an operation
y = x + 5

# Check the gradient function associated with y
print(y.grad_fn)  # Output: <AddBackward0 object at ...>

# Calculate the mean of y
z = y.mean()

# Perform backpropagation to calculate gradients
z.backward()

# Print the gradient of x
print(x.grad)  # Output: tensor([0.3333, 0.3333, 0.3333])

Tracking Tensor Gradients

By default, tensors are not tracked. In the example below, y has a gradient function because of the multiplication, but it's not associated with x.

x = torch.randn(2, 2)
y = (x * x).sum()

# y has a gradient function because of the operations
print(x, y.grad_fn)

# Enable tracking with `requires_grad` flag:
x.requires_grad_(True)

# y's gradient function is now associated with x
print(x, y.grad_fn)

To detach x from the computational graph and disable gradient tracking:

# Detach x from the computation graph (no gradient tracking)
x.detach()
print(x.requires_grad, y.requires_grad)
# (True, False)

# Disable grad tracking `with torch.no_grad()`
x = torch.randn(2, 2, requires_grad=True)
print(x.requires_grad) # True

with torch.no_grad():
  y = x ** 2
  print(y.requires_grad) # False

Gradient Descent

Here's how to implement a simple gradient descent algorithm:

# Input and target data
X = torch.tensor([1, 2, 3])
Y = torch.tensor([2, 4, 6])

# Initialize the weight parameter
w = torch.tensor(0, dtype=torch.float32, requires_grad=True)

# Define the forward pass (linear model)
def forward(x):
  return w * x

# Define the loss function (mean squared error)
def loss(y, y_pred):
  return ((y_pred - y) ** 2).mean()

# Test input
X_test = 5.0

# Prediction before training
print(f'Prediction before training: f({X_test}) = {forward(X_test).item():.3f}')

# Training parameters
lr = 0.01  # Learning rate
n_epochs = 100

# Gradient Descent Loop
for epoch in range(n_epochs):
    # Forward pass
    y_pred = forward(X)

    # Calculate the loss
    l = loss(Y, y_pred)

    # Calculate gradients
    l.backward()

    # Update the weight parameter
    with torch.no_grad():
        w -= lr * w.grad

        # Zero out the gradients
        w.grad.zero_()

    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f'epoch {epoch + 1}: w = {w.item():.3f}, loss = {l.item():.3f}')

# Prediction after training
print(f'Prediction after training: f({X_test}) = {forward(X_test).item():.3f}')

Model, Loss Function, & Optimizer

PyTorch provides a more streamlined way to define and train models using the nn.Module class, loss functions from torch.nn, and optimizers from torch.optim:

import torch.nn as nn

# Define the training data (input and target)
X = torch.tensor([[1], [2], [3], [4], [5], [6], [7], [8]], dtype=torch.float32)
Y = torch.tensor([[2], [4], [6], [8], [10], [12], [14], [16]], dtype=torch.float32)

# Determine the number of samples and features
n_samples, n_features = X.shape
print(f'n_samples = {n_samples}, n_features = {n_features}')

# Define a test input
X_test = torch.tensor([5], dtype=torch.float32)

# Define the Linear Regression Model
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # Define the linear layer
        self.lin = nn.Linear(input_dim, output_dim, dtype=torch.float32)

    def forward(self, x):
        # Define the forward pass
        return self.lin(x)

# Instantiate the model
input_size, output_size = n_features, n_features
model = LinearRegression(input_size, output_size)

# Prediction before training
print(f'Prediction before training: f({X_test.item()}) = {model(X_test).item():.3f}')

# Training parameters
learning_rate = 0.01
n_epochs = 100

# Define the loss function (Mean Squared Error)
loss = nn.MSELoss()

# Define the optimizer (Stochastic Gradient Descent)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# Access the model parameters
params = model.parameters()
print(list(params))

# Training loop
for epoch in range(n_epochs):
    # Forward pass: Make predictions
    y_predicted = model(X)

    # Calculate the loss
    l = loss(Y, y_predicted)

    # Backward pass: Calculate gradients
    l.backward()

    # Update the weights using the optimizer
    optimizer.step()

    # Zero out the gradients for the next iteration
    optimizer.zero_grad()

    # Print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
        w, b = model.parameters() # Get the weights and bias
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l.item():.3f}')

# Prediction after training
print(f'Prediction after training: f({X_test.item()}) = {model(X_test).item():.3f}')

Neural Net

This example demonstrates training a neural network on the MNIST dataset, which consists of handwritten digits.

Definition

Define a NeuralNet using the nn.Module class:

import torch.nn as nn

class NeuralNet(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(NeuralNet, self).__init__()
    self.l1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()
    self.l2 = nn.Linear(hidden_size, num_classes)

  def forward(self, x):
    out = self.l1(x)
    out = self.relu(out)
    out = self.l2(out)
    return out

Dataset

Download the MNIST dataset using built-in PyTorch functions.

import torch
import torchvision
import torchvision.transforms as transforms

batch_size = 100

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)

test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transforms.ToTensor())

# Create data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

examples = iter(test_loader)
example_data, example_targets = next(examples)

Hyperparameters & Training Loop

Specify Hyperparameters and a training loop to teach the model.

# Hyperparameters
input_size = 784 # 28x28 image size
hidden_size = 500
num_classes = 10
num_epochs = 2
learning_rate = 0.001

model = NeuralNet(input_size, hidden_size, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
n_total_steps = len(train_loader) # 600

for epoch in range(num_epochs):
  for i, (images, labels) in enumerate(train_loader):
    # origin shape: [100, 1, 28, 28]
    # resized: [100, 784]
    images = images.reshape(-1, 28 * 28).to(device)
    labels = labels.to(device)

    # Forward pass and loss calculation
    outputs = model(images)
    loss = criterion(outputs, labels)

    # Backward and optimize
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()

    if (i+1) % 100 == 0:
      print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{n_total_steps}], Loss: {loss.item():.4f}')

Evaluate the model

with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value, index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')

Visualization

ProTip: It's possible to chart the images using matplotlib.pyplot even as tensors.

import matplotlib.pyplot as plt

examples = iter(test_loader)
example_data, example_targets = next(examples)

for i in range(6):
    plt.subplot(2,3,i+1)
    plt.imshow(example_data[i][0], cmap='gray')
plt.show()

Convolutional Neural Net

This example demonstrates training a CNN on the CIFAR10 dataset, which consists of color images in 10 classes.

Definition

The neural network below is a simple convolutional neural network (CNN) designed for image classification tasks.

In the forward pass, each of the above layers are applied to the image to achieve the goal of the classification task.

import torch.nn as nn
import torch.nn.functional as F

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 64, 3)
        self.conv3 = nn.Conv2d(64, 64, 3)
        self.fc1 = nn.Linear(64 * 4 * 4, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        # N, 3, 32, 32
        x = F.relu(self.conv1(x))  # -> N, 32, 30, 30
        x = self.pool(x)           # -> N, 32, 15, 15
        x = F.relu(self.conv2(x))  # -> N, 64, 13, 13
        x = self.pool(x)           # -> N, 64, 6, 6
        x = F.relu(self.conv3(x))  # -> N, 64, 4, 4
        x = torch.flatten(x, 1)     # -> N, 1024
        x = F.relu(self.fc1(x))     # -> N, 64
        x = self.fc2(x)             # -> N, 10
        return x

Dataset

Download the CIFAR10 dataset using built-in PyTorch functions.

import torch
import torchvision
import torchvision.transforms as transforms

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Define transformations for the dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Load the CIFAR10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)

test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

# Create data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                          shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size,
                                         shuffle=False)

# Define the classes in CIFAR10
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Hyperparameters & Training Loop

Specify Hyperparameters and a training loop to teach the model.

# Hyperparameters
num_epochs = 10
batch_size = 32
learning_rate = 0.001

# Instantiate the model and move to the device
model = ConvNet().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
n_total_steps = len(train_loader)
for epoch in range(num_epochs):
    running_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        running_loss += loss.item()

    print(f'[{epoch + 1}] loss: {running_loss / n_total_steps:.3f}')

print('Finished Training')

# Save the trained model
PATH = './cnn.pth'
torch.save(model.state_dict(), PATH)

Evaluate the model

# Load the model
loaded_model = ConvNet()
loaded_model.load_state_dict(torch.load(PATH))
loaded_model.to(device)
loaded_model.eval()

# Evaluate the trained model
with torch.no_grad():
    n_correct = 0
    n_correct2 = 0
    n_samples = len(test_loader.dataset)

    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)

        # Get predicted class
        _, predicted = torch.max(outputs, 1)
        n_correct += (predicted == labels).sum().item()

        # Evaluate the loaded model
        outputs2 = loaded_model(images)
        _, predicted2 = torch.max(outputs2, 1)
        n_correct2 += (predicted2 == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the model: {acc} %')

    acc = 100.0 * n_correct2 / n_samples
    print(f'Accuracy of the loaded model: {acc} %')

Visualization

Plot using matplotlib with some help from numpy.

import matplotlib.pyplot as plt
import numpy as np

# Function to display an image
def imshow(img):
    img = img / 2 + 0.5  # Unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()

# Display an image from the dataset (optional)
dataiter = iter(train_loader)
images, labels = next(dataiter)
img_grid = torchvision.utils.make_grid(images[0:25], nrow=5)
imshow(img_grid)

Helpful Resources