Developing a Deep Learning Application

25 Jul 2020

This project demonstrates development of a Python command-line application that uses a deep neural network to predict the contents of images. The application enables the user to train the network on a set of arbitrary, labeled images and to use that trained network to predict the labels of new images.

During development, the network managed 78.4% accuracy on a dataset consisting of images of 102 different types of flowers.

This is page 3 of 3 for the project, where I

Convert the Network Prediction Function to a Command-Line Application

import torch
from torch import nn, optim
from torchvision import transforms, datasets, models
import json
import numpy as np
from PIL import Image
import argparse

parser = argparse.ArgumentParser(description='Use a trained neural network to make predictions about the contents of images.')

parser.add_argument('image_path',             # Required: image_path
                    help='the path to an image to make a prediction on.')
parser.add_argument('checkpoint',             # Required: image_path
                    help='the path to a trained neural network.')
parser.add_argument('--top_k',                # Optional: save_directory
                    help='the number of most likely classes to include.',
                    type=int)
parser.add_argument('--category_names',       # Optional: save_directory
                    help='a json file containing a mapping of categories to real names.')
parser.add_argument('--gpu',                  # Optional: --gpu
                    help='use the gpu instead of CPU for training.',
                    action="store_true")

args = parser.parse_args()

image_path = args.image_path
checkpoint = args.checkpoint
top_k = args.top_k if args.top_k else 5
category_names = args.category_names if args.category_names else ''
gpu = args.gpu

print('\n    image_path: ' + image_path)
print('    checkpoint: ' + checkpoint)
print('         top_k: ' + str(top_k))
print('category_names: ' + str(category_names))
print('           gpu: ' + str(gpu) + '\n')

######################### Helper Functions ####################################

def load_model(filepath):
    checkpoint = torch.load(filepath)

    arch = checkpoint['base_arch']
    hidden_units = checkpoint['hidden_units']
    output_units = checkpoint['output_units']
    state_dict = checkpoint['state_dict']
    idx_to_class_num = checkpoint['idx_to_class_num']

    if arch == 'vgg13':
        model = models.vgg13(pretrained=True)
    elif arch == 'vgg16':
        model = models.vgg16(pretrained=True)
    elif arch == 'vgg19':
        model = models.vgg19(pretrained=True)
    else:
        model = models.vgg11(pretrained=True)

    for param in model.parameters():
        param.requires_grad = False

    model.classifier = nn.Sequential(nn.Linear(25088, hidden_units),
                                     nn.ReLU(),
                                     nn.Dropout(0.2),
                                     nn.Linear(hidden_units, output_units),
                                     nn.LogSoftmax(dim=1))

    model.classifier.load_state_dict(state_dict)

    return model, idx_to_class_num

def process_image(image_path):
    img = Image.open(image_path)

    # Resize Image so shortest side is a specified length
    short_side = 256
    width, height = img.size
    resize_ratio = max(short_side/width, short_side/height)
    newsize = int(resize_ratio*width), int(resize_ratio*height)
    resized_img = img.resize(newsize)

    # Crop Image to Center 224x224
    center_size = 224
    left = int((newsize[0]-center_size)/2)
    right = left + center_size
    upper = int((newsize[1]-center_size)/2)
    lower = upper + center_size
    cropped_img = resized_img.crop((left, upper, right, lower))

    # Encode color channels as floats (0-1) instead of integers (0-255)
    np_image = np.array(cropped_img)/255.

    # Normalize colors
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    normalized_image = (np_image - mean)/std

    # Convert each image from (224, 224, 3) to (3, 224, 224)
    # PyTorch expects the color channel to be first, but is last
    processed_image = normalized_image.transpose((2, 0, 1))

    return processed_image

def predict(image_path, model, topk=5):
    image = process_image(image_path)

    image = torch.from_numpy(image)
    # Reshape from [3, 224, 224] to [1, 3, 224, 224]
    image = image.view(1, 3, 224, 224)
    image = image.to(device, dtype=torch.float)

    logps = model.forward(image)
    ps = torch.exp(logps)
    top_p, top_class = ps.topk(topk, dim=1)

    return top_p, top_class

##################### Script Execution Continues ##############################

device = 'cuda' if gpu else 'cpu'

model, idx_to_class_num = load_model(checkpoint)
model.to(device)

probabilities, classes = predict(image_path, model, top_k)

probs = probabilities[0][0].item()
indices = classes[0][0].item()

if category_names != '':
    with open(category_names) as f:
        class_num_to_name = json.load(f)

names, probs  = [], []
for c, p in zip(classes[0], probabilities[0]):
    index = c.item()

    class_num = idx_to_class_num[index]
    if category_names != '':
        name = class_num_to_name[class_num]
        names.append(name)
    else:
        names.append(class_num)

    probs.append(p.item())

for name, prob in zip(names, probs):
    print('{:4.1f}% : {}'.format(prob*100., name))