import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
from IPython.display import Image
from sklearn import tree
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
import matplotlib.ticker as ticker

import warnings
warnings.filterwarnings('ignore')


# Parameters
n_classes = 3
plot_colors = "bry"
plot_step = 0.02
plt.rcParams["figure.figsize"] = [12, 8]

# Load data
iris = load_iris()

for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3],
                                [1, 2], [1, 3], [2, 3]]):
    # We only take the two corresponding features
    X = iris.data[:, pair]
    y = iris.target

    # Shuffle
    idx = np.arange(X.shape[0])
    np.random.seed(13)
    np.random.shuffle(idx)
    X = X[idx]
    y = y[idx]

    # Standardize
    mean = X.mean(axis=0)
    std = X.std(axis=0)
    X = (X - mean) / std

    # Train
    clf = DecisionTreeClassifier(criterion='entropy').fit(X, y)

    # Plot the decision boundary
    plt.subplot(2, 3, pairidx + 1)

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                         np.arange(y_min, y_max, plot_step))

    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    cs = plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

    plt.xlabel(iris.feature_names[pair[0]])
    plt.ylabel(iris.feature_names[pair[1]])
    plt.axis()

    # Plot the training points
    for i, color in zip(range(n_classes), plot_colors):
        idx = np.where(y == i)
        plt.scatter(X[idx, 0], X[idx, 1], c=color,
                    label=iris.target_names[i],
                    cmap=plt.cm.Paired)
    plt.axis()

plt.legend(loc="upper left")
plt.show()


plt.figure()
tree.plot_tree(clf, feature_names = list(iris.feature_names), class_names=list(iris.target_names), filled = True);
plt.show()


from sklearn.model_selection import cross_val_score
scores = cross_val_score(clf, iris.data, iris.target, cv=5)
print(scores)

[0.96666667 0.96666667 0.9        0.93333333 1.        ]


print("Max Depth\tAvg Score") 

depths = range(1, 8)
scores = []

for i in depths:
    clf = tree.DecisionTreeClassifier(criterion='entropy',max_depth=i)

    # Compute cross-val score
    score = cross_val_score(clf, iris.data, iris.target, cv=5).mean() 
    scores.append(score)
    
    # Print scores 
    print("%d\t\t%.3f" % (i, score))

# Plot scores  
plt.figure(figsize=(10,3))
plt.plot(depths, scores)
plt.xlabel("Max Depth")
plt.ylabel("Avg Cross-Val Score")
plt.show()

Max Depth	Avg Score
1		0.667
2		0.933
3		0.960
4		0.953
5		0.953
6		0.953
7		0.960


X = iris.data
y = iris.target

clf = tree.DecisionTreeClassifier(criterion='entropy',max_depth=2)

clf = clf.fit(X,y)

plt.figure(figsize=(16,3))
tree.plot_tree(clf, feature_names = list(iris.feature_names), class_names=list(iris.target_names), filled = True);
plt.show()

Tuning Tree Depth of Decision Tree Classifier¶