1

I am currently going through the FastAI course and to practise, I wanted to code a neural network that classifies the FashionMNIST dataset from scratch.

Lately, I've been running into an issue where I get a consistent validation accuracy score of 0.9 sometimes (afer reinitiating my weights and biases), but occasionally it's stuck at 0.1, and does not go anywhere from there.

when it works when it doesn't

I tried to start again from zero multiple times, but can't figure out the issue. I would very much appreciate any help :)

Below is my code:

#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

#hide
from fastai.vision.all import *
from fastbook import *

matplotlib.rc('image', cmap='Greys')

import torchvision.transforms as transforms
from torchvision.transforms import ToTensor, Lambda
from torch.utils.data import DataLoader
from torchvision import datasets
import torch
import matplotlib.pyplot as plt
from torch.nn import CrossEntropyLoss

tfms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1037,), (0.3081))])

training_data = datasets.FashionMNIST(
    root = 'data',
    train = True,
    download = True,
    transform = tfms,
)

train_x = training_data.data.view(-1, 28*28).float()/255

df = pd.DataFrame(train_x[0].view(28,28))
df.style.set_properties(**{'font-size': '6pt'}).background_gradient('Greys')

target_transform = Lambda(lambda y: torch.zeros(10, dtype=float).scatter_(0, y, value = 1))
train_y = torch.stack([target_transform(y) for y in training_data.targets])

dset = list(zip(train_x, train_y))
dl = DataLoader(dset, batch_size=1024, shuffle=True)

testing_data = datasets.FashionMNIST(
    root='data',
    train= False,
    download= True,
    transform = tfms
)

valid_x = testing_data.data.view(-1, 28*28).float()/255
valid_x.shape

valid_y = torch.stack([target_transform(y) for y in testing_data.targets])
valid_y.shape

valid_dset = list(zip(valid_x, valid_y))
valid_dl = DataLoader(dset, batch_size=1024, shuffle = True)

# Init params

def init_params(size, std=1.0):
  return (torch.randn(size)*std).requires_grad_()

weights = init_params((28*28, 10))
biases = init_params(10, 1)

weights.shape, biases.shape, train_x[0].shape

# Predict

def linear1(xb):
  return xb@weights + biases

# Calculate loss

def cross_entropy_loss(predictions, targets):
  predictions = predictions.softmax(dim=1)
  cross = -targets*torch.log(predictions)
  return cross.sum(1).mean()

cross_entropy_loss(linear1(train_x[0:40]), train_y[0:40])

# Calc grad

def calc_grad(model, xb, yb):
  preds = model(xb)
  loss = cross_entropy_loss(preds, yb)
  loss.backward()

# Train epoch

lr = 10
params = weights, biases

def train_epoch(model, params, lr):
  for xb, yb in dl:
    calc_grad(model, xb, yb)
    for p in params:
      p.data -= p.grad * lr
      p.grad.zero_()

def batch_accuracy(xb, yb):
  preds = xb.softmax(dim=1).round()
  correct = preds.max(1).indices == yb.max(1).indices
  return correct.float().mean()

def validate_epoch(model):
  accs = [batch_accuracy(model(xb), yb) for xb, yb in valid_dl]
  return round(torch.stack(accs).mean().item(), 4)

train_epoch(linear1, params, lr)

validate_epoch(linear1)

for i in range(5):
  train_epoch(linear1, params, lr)
  print(validate_epoch(linear1), end= ' ')

print(linear1(valid_x[0:15]).softmax(dim=1).round().max(1).indices == valid_y[0:15].max(1).indices)
DerOeko
  • 13
  • 3

0 Answers0