Visualizing Decision Boundaries
Train Test Split
# Import statements
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np
# Import the train test split
from sklearn.model_selection import train_test_split
# Read in the data
df = pd.read_csv('visualizing-decision-boundaries/data.csv', header=None)
df.head()
0 | 1 | 2 | |
---|---|---|---|
0 | 0.24539 | 0.81725 | 0 |
1 | 0.21774 | 0.76462 | 0 |
2 | 0.20161 | 0.69737 | 0 |
3 | 0.20161 | 0.58041 | 0 |
4 | 0.24770 | 0.49561 | 0 |
data = np.asarray(df)
# Assign the features to the variable X, and the labels to the variable y
X = data[:,0:2]
y = data[:,2]
# Use train test split to split your data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
# Instantiate the model and fit it to the training data
model = DecisionTreeClassifier().fit(X_train, y_train)
# Use the trained model to make predictions
y_pred = model.predict(X_test)
# Calculate the accuracy and assign it to the variable acc on the test data
acc_percent = accuracy_score(y_pred, y_test)
acc_numerator = accuracy_score(y_pred, y_test, normalize=False)
accuracy_string = '{:2.1f}% ({}/{}) Accurate on the Test Set'.format(acc_percent*100.,
acc_numerator,
y_test.shape[0])
print(accuracy_string)
95.8% (23/24) Accurate on the Test Set
Visualize the Decision Boundaries
%matplotlib notebook
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
cmap_light = ListedColormap(['#FFCCCC', '#CCCCFF'])
cmap_bold = ListedColormap(['#FF1111', '#1111FF'])
mesh_step_size = .001
mark_symbol_size = 20
def plot_decision_boundaries(Xs, Ys, title):
plt.figure()
# Plot Decision Boundaries
xx, yy = np.meshgrid(np.arange(0, 1, mesh_step_size),
np.arange(0, 1, mesh_step_size))
Z = model.predict(np.c_[xx.ravel(),
yy.ravel()])
Z = Z.reshape(xx.shape)
plt.pcolormesh(xx, yy, Z, cmap=cmap_light)
# Plot Points
plt.scatter(Xs[:, 0],
Xs[:, 1],
s=mark_symbol_size,
c=Ys,
cmap=cmap_bold,
edgecolor='black')
plt.title(title)
for spine in plt.gca().spines.values():
spine.set_visible(False)
plt.xticks([0.0,1.0])
plt.yticks([0.0,1.0]);
plot_decision_boundaries(X_train, y_train, 'Training Set');
<IPython.core.display.Javascript object>
plot_decision_boundaries(X_test, y_test, accuracy_string);
<IPython.core.display.Javascript object>
This content is taken from notes I took while pursuing the Intro to Machine Learning with Pytorch nanodegree certification.