Developing a Deep Learning Application
This project demonstrates development of a Python command-line application that uses a deep neural network to predict the contents of images. The application enables the user to train the network on a set of arbitrary, labeled images and to use that trained network to predict the labels of new images.
During development, the network managed 78.4% accuracy on a dataset consisting of images of 102 different types of flowers.
This is page 3 of 3 for the project, where I
Convert the Network Prediction Function to a Command-Line Application
import torch
from torch import nn, optim
from torchvision import transforms, datasets, models
import json
import numpy as np
from PIL import Image
import argparse
parser = argparse.ArgumentParser(description='Use a trained neural network to make predictions about the contents of images.')
parser.add_argument('image_path', # Required: image_path
help='the path to an image to make a prediction on.')
parser.add_argument('checkpoint', # Required: image_path
help='the path to a trained neural network.')
parser.add_argument('--top_k', # Optional: save_directory
help='the number of most likely classes to include.',
type=int)
parser.add_argument('--category_names', # Optional: save_directory
help='a json file containing a mapping of categories to real names.')
parser.add_argument('--gpu', # Optional: --gpu
help='use the gpu instead of CPU for training.',
action="store_true")
args = parser.parse_args()
image_path = args.image_path
checkpoint = args.checkpoint
top_k = args.top_k if args.top_k else 5
category_names = args.category_names if args.category_names else ''
gpu = args.gpu
print('\n image_path: ' + image_path)
print(' checkpoint: ' + checkpoint)
print(' top_k: ' + str(top_k))
print('category_names: ' + str(category_names))
print(' gpu: ' + str(gpu) + '\n')
######################### Helper Functions ####################################
def load_model(filepath):
checkpoint = torch.load(filepath)
arch = checkpoint['base_arch']
hidden_units = checkpoint['hidden_units']
output_units = checkpoint['output_units']
state_dict = checkpoint['state_dict']
idx_to_class_num = checkpoint['idx_to_class_num']
if arch == 'vgg13':
model = models.vgg13(pretrained=True)
elif arch == 'vgg16':
model = models.vgg16(pretrained=True)
elif arch == 'vgg19':
model = models.vgg19(pretrained=True)
else:
model = models.vgg11(pretrained=True)
for param in model.parameters():
param.requires_grad = False
model.classifier = nn.Sequential(nn.Linear(25088, hidden_units),
nn.ReLU(),
nn.Dropout(0.2),
nn.Linear(hidden_units, output_units),
nn.LogSoftmax(dim=1))
model.classifier.load_state_dict(state_dict)
return model, idx_to_class_num
def process_image(image_path):
img = Image.open(image_path)
# Resize Image so shortest side is a specified length
short_side = 256
width, height = img.size
resize_ratio = max(short_side/width, short_side/height)
newsize = int(resize_ratio*width), int(resize_ratio*height)
resized_img = img.resize(newsize)
# Crop Image to Center 224x224
center_size = 224
left = int((newsize[0]-center_size)/2)
right = left + center_size
upper = int((newsize[1]-center_size)/2)
lower = upper + center_size
cropped_img = resized_img.crop((left, upper, right, lower))
# Encode color channels as floats (0-1) instead of integers (0-255)
np_image = np.array(cropped_img)/255.
# Normalize colors
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
normalized_image = (np_image - mean)/std
# Convert each image from (224, 224, 3) to (3, 224, 224)
# PyTorch expects the color channel to be first, but is last
processed_image = normalized_image.transpose((2, 0, 1))
return processed_image
def predict(image_path, model, topk=5):
image = process_image(image_path)
image = torch.from_numpy(image)
# Reshape from [3, 224, 224] to [1, 3, 224, 224]
image = image.view(1, 3, 224, 224)
image = image.to(device, dtype=torch.float)
logps = model.forward(image)
ps = torch.exp(logps)
top_p, top_class = ps.topk(topk, dim=1)
return top_p, top_class
##################### Script Execution Continues ##############################
device = 'cuda' if gpu else 'cpu'
model, idx_to_class_num = load_model(checkpoint)
model.to(device)
probabilities, classes = predict(image_path, model, top_k)
probs = probabilities[0][0].item()
indices = classes[0][0].item()
if category_names != '':
with open(category_names) as f:
class_num_to_name = json.load(f)
names, probs = [], []
for c, p in zip(classes[0], probabilities[0]):
index = c.item()
class_num = idx_to_class_num[index]
if category_names != '':
name = class_num_to_name[class_num]
names.append(name)
else:
names.append(class_num)
probs.append(p.item())
for name, prob in zip(names, probs):
print('{:4.1f}% : {}'.format(prob*100., name))