Arboles

El objetivo de este notebook es implementar el algoritmo de arboles. Los arboles puedes ser tuilizados para prediccion y para clasificacion. Como Miguel lo explico, existen diferentes medidas para realizar la seleccion de la variable de un arbol.

Para iniciar vamos a trabajar con datos categoricos:

$H_{(S)} = \sum_{i=1}^{C}{-p_i \log_2{p_i}}$

$H_{(T,X)} = \sum_{c \in{X}}{p_{(c)}{H_{(c)}} }$

$Gain_{(T,X)} = H_{(T)} - H_{(T,X)}$

Algunos links the referencia.

In [13]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import networkx as nx
import matplotlib
from sklearn import datasets
from networkx.drawing.nx_agraph import graphviz_layout
%matplotlib inline
In [2]:
iris = datasets.load_iris()
In [3]:
df_x = pd.DataFrame(iris['data'], columns=iris.feature_names )
df_x.head(5)
Out[3]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
0 5.1 3.5 1.4 0.2
1 4.9 3.0 1.4 0.2
2 4.7 3.2 1.3 0.2
3 4.6 3.1 1.5 0.2
4 5.0 3.6 1.4 0.2
In [41]:
df_trans = df_x.apply(lambda x: pd.qcut(x,6, False),axis=0)
y = iris['target']
df_trans.head(5)
Out[41]:
sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)
0 1 5 0 0
1 0 2 0 0
2 0 3 0 0
3 0 3 0 0
4 0 5 0 0
In [87]:
class Tree():
        
    def __init__(self, stop=4, verbose=False, col_names=None):
        self.G = nx.DiGraph()
        self.verbose = verbose
        self.col_names = col_names
        self.stop = stop
        
    def train(self,X,y):
        def create_mask(r_ix_aux, r_ix):
            t = 0
            for ix,ix_val in enumerate(r_ix_aux):
                if ix_val:
                    r_ix_aux[ix] = r_ix[t]
                    t += 1
                else:
                    r_ix_aux[ix] = False
            return r_ix_aux
        
        X_aux = X.copy()
        y_aux = y.copy()
        w_queue = []
        c_ix,h_val,g_val = self.calculate_gain(X,y)
        node_prev = self.get_name(c_ix)
        self.G.add_node(node_prev)
        uniques, counts = np.unique(X_aux[:,c_ix], return_counts=True)
        r_ix = [True]*X.shape[0]
        w_queue.extend([(c_ix,r_ix.copy(),ele,node_prev,) for ele in uniques])
        while len(w_queue)>0:
            c_ix, r_ix, ele, node_prev = w_queue.pop(0)
            r_ix_aux = r_ix
            r_ix = np.array(np.equal(X[r_ix,c_ix],[ele]))
            r_ix = create_mask(r_ix_aux, r_ix)
            c_ix,h_val,g_val = self.calculate_gain(X_aux[r_ix,:]
                                           ,y_aux[r_ix])
            print('control', c_ix,g_val,sum(g_val),len(w_queue) )
            if sum(h_val) > 0 and sum(g_val) > 0 :
                node_cur = self.get_name(c_ix)
                self.G.add_node(node_cur)
                self.G.add_edge(node_prev,
                                node_cur,
                                attr_dict={"name":ele,"col":c_ix})
                uniques, counts = np.unique(X_aux[r_ix,c_ix], return_counts=True)
                w_queue.extend([(c_ix,r_ix.copy(),ele,node_cur) for ele in uniques])

            if sum(h_val) == 0.0 :
                uniques, counts = np.unique(y_aux[r_ix],
                                            return_counts=True)
                for unique,count in zip(uniques,counts):
                    final_node = node_prev+"-"+str(unique)
                    self.G.add_node(final_node)
                    self.G.add_edge(node_prev,
                                    final_node,
                                    attr_dict={"name":ele,"col":count})
        
    def calculate_gain(self, X,y):
        """Calcula la ganancia"""
        ini_h = self.entropy(y)
        if len(X.shape)==1:
            X = X.reshape(-1,1)
        n = X.shape[1]
        t_gain = []
        t_entropy = []
        for ix in range(n):
            h_Xy = self.entropy_2v(X[:,ix],y)
            t_gain.append(ini_h-h_Xy)
            t_entropy.append(h_Xy)
        ix_sel = np.argmax(t_gain)
        return ix_sel, t_entropy, t_gain
        
    def entropy(self, y):
        n = np.array(y).shape[0]
        unique, counts = np.unique(y, return_counts=True)
        f_plog = lambda x : -x*math.log(x,2)
        h = sum( [f_plog(c*1.0/n) for c in counts])
        if self.verbose:
            msj = "Num {0} Counts{1} Entropy {2:.02f}"
            print (msj.format(unique, counts, h))   
        return h
    
    def entropy_2v(self,x,y):
        n = np.array(x).shape[0]
        uniques, counts = np.unique(x, return_counts=True)
        h_st = 0
        for unique, counts in zip(uniques, counts):
            prob_c = counts*1.0/n
            ix = np.array(np.equal(x, [unique]))
            h_st += prob_c * self.entropy(y[ix])
            if self.verbose:
                msj = "Prob {0:.02f} Entropy {0:.02f}"
                print (msj.format(prob_c, self.entropy(y[ix])) )
        return h_st
    
    def predict():
        pass

    def get_name(self,ix):
            return self.col_names[ix] if self.col_names is not None else str(ix)      
        
In [88]:
m_tree = Tree(verbose=True, col_names=df_trans.columns.values)
m_tree.train(df_trans.values, y)
Num [0 1 2] Counts[50 50 50] Entropy 1.58
Num [0 1 2] Counts[28  3  1] Entropy 0.64
Num [0 1 2] Counts[28  3  1] Entropy 0.64
Prob 0.21 Entropy 0.21
Num [0 1] Counts[17  3] Entropy 0.61
Num [0 1] Counts[17  3] Entropy 0.61
Prob 0.13 Entropy 0.13
Num [0 1 2] Counts[ 5 18  5] Entropy 1.30
Num [0 1 2] Counts[ 5 18  5] Entropy 1.30
Prob 0.19 Entropy 0.19
Num [1 2] Counts[15 13] Entropy 1.00
Num [1 2] Counts[15 13] Entropy 1.00
Prob 0.19 Entropy 0.19
Num [1 2] Counts[ 8 14] Entropy 0.95
Num [1 2] Counts[ 8 14] Entropy 0.95
Prob 0.15 Entropy 0.15
Num [1 2] Counts[ 3 17] Entropy 0.61
Num [1 2] Counts[ 3 17] Entropy 0.61
Prob 0.13 Entropy 0.13
Num [0 1 2] Counts[ 1 21 11] Entropy 1.10
Num [0 1 2] Counts[ 1 21 11] Entropy 1.10
Prob 0.22 Entropy 0.22
Num [0 1 2] Counts[ 1 13 10] Entropy 1.20
Num [0 1 2] Counts[ 1 13 10] Entropy 1.20
Prob 0.16 Entropy 0.16
Num [0 1 2] Counts[ 6  8 12] Entropy 1.53
Num [0 1 2] Counts[ 6  8 12] Entropy 1.53
Prob 0.17 Entropy 0.17
Num [0 1 2] Counts[10  6  9] Entropy 1.55
Num [0 1 2] Counts[10  6  9] Entropy 1.55
Prob 0.17 Entropy 0.17
Num [0 1 2] Counts[11  2  5] Entropy 1.30
Num [0 1 2] Counts[11  2  5] Entropy 1.30
Prob 0.12 Entropy 0.12
Num [0 2] Counts[21  3] Entropy 0.54
Num [0 2] Counts[21  3] Entropy 0.54
Prob 0.16 Entropy 0.16
Num [0] Counts[37] Entropy 0.00
Num [0] Counts[37] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [0] Counts[13] Entropy 0.00
Num [0] Counts[13] Entropy 0.00
Prob 0.09 Entropy 0.09
Num [1] Counts[25] Entropy 0.00
Num [1] Counts[25] Entropy 0.00
Prob 0.17 Entropy 0.17
Num [1 2] Counts[23  6] Entropy 0.74
Num [1 2] Counts[23  6] Entropy 0.74
Prob 0.19 Entropy 0.19
Num [1 2] Counts[ 2 19] Entropy 0.45
Num [1 2] Counts[ 2 19] Entropy 0.45
Prob 0.14 Entropy 0.14
Num [2] Counts[25] Entropy 0.00
Num [2] Counts[25] Entropy 0.00
Prob 0.17 Entropy 0.17
Num [0] Counts[34] Entropy 0.00
Num [0] Counts[34] Entropy 0.00
Prob 0.23 Entropy 0.23
Num [0] Counts[16] Entropy 0.00
Num [0] Counts[16] Entropy 0.00
Prob 0.11 Entropy 0.11
Num [1] Counts[28] Entropy 0.00
Num [1] Counts[28] Entropy 0.00
Prob 0.19 Entropy 0.19
Num [1 2] Counts[20  4] Entropy 0.65
Num [1 2] Counts[20  4] Entropy 0.65
Prob 0.16 Entropy 0.16
Num [1 2] Counts[ 2 23] Entropy 0.40
Num [1 2] Counts[ 2 23] Entropy 0.40
Prob 0.17 Entropy 0.17
Num [2] Counts[23] Entropy 0.00
Num [2] Counts[23] Entropy 0.00
Prob 0.15 Entropy 0.15
Num [0] Counts[34] Entropy 0.00
Num [0] Counts[22] Entropy 0.00
Num [0] Counts[22] Entropy 0.00
Prob 0.65 Entropy 0.65
Num [0] Counts[9] Entropy 0.00
Num [0] Counts[9] Entropy 0.00
Prob 0.26 Entropy 0.26
Num [0] Counts[3] Entropy 0.00
Num [0] Counts[3] Entropy 0.00
Prob 0.09 Entropy 0.09
Num [0] Counts[1] Entropy 0.00
Num [0] Counts[1] Entropy 0.00
Prob 0.03 Entropy 0.03
Num [0] Counts[5] Entropy 0.00
Num [0] Counts[5] Entropy 0.00
Prob 0.15 Entropy 0.15
Num [0] Counts[10] Entropy 0.00
Num [0] Counts[10] Entropy 0.00
Prob 0.29 Entropy 0.29
Num [0] Counts[7] Entropy 0.00
Num [0] Counts[7] Entropy 0.00
Prob 0.21 Entropy 0.21
Num [0] Counts[11] Entropy 0.00
Num [0] Counts[11] Entropy 0.00
Prob 0.32 Entropy 0.32
Num [0] Counts[27] Entropy 0.00
Num [0] Counts[27] Entropy 0.00
Prob 0.79 Entropy 0.79
Num [0] Counts[7] Entropy 0.00
Num [0] Counts[7] Entropy 0.00
Prob 0.21 Entropy 0.21
Num [0] Counts[34] Entropy 0.00
Num [0] Counts[34] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 5
Num [0] Counts[16] Entropy 0.00
Num [0] Counts[6] Entropy 0.00
Num [0] Counts[6] Entropy 0.00
Prob 0.38 Entropy 0.38
Num [0] Counts[8] Entropy 0.00
Num [0] Counts[8] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [0] Counts[2] Entropy 0.00
Num [0] Counts[2] Entropy 0.00
Prob 0.12 Entropy 0.12
Num [0] Counts[1] Entropy 0.00
Num [0] Counts[1] Entropy 0.00
Prob 0.06 Entropy 0.06
Num [0] Counts[1] Entropy 0.00
Num [0] Counts[1] Entropy 0.00
Prob 0.06 Entropy 0.06
Num [0] Counts[4] Entropy 0.00
Num [0] Counts[4] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [0] Counts[10] Entropy 0.00
Num [0] Counts[10] Entropy 0.00
Prob 0.62 Entropy 0.62
Num [0] Counts[10] Entropy 0.00
Num [0] Counts[10] Entropy 0.00
Prob 0.62 Entropy 0.62
Num [0] Counts[6] Entropy 0.00
Num [0] Counts[6] Entropy 0.00
Prob 0.38 Entropy 0.38
Num [0] Counts[16] Entropy 0.00
Num [0] Counts[16] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 4
Num [1] Counts[28] Entropy 0.00
Num [1] Counts[3] Entropy 0.00
Num [1] Counts[3] Entropy 0.00
Prob 0.11 Entropy 0.11
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.04 Entropy 0.04
Num [1] Counts[17] Entropy 0.00
Num [1] Counts[17] Entropy 0.00
Prob 0.61 Entropy 0.61
Num [1] Counts[5] Entropy 0.00
Num [1] Counts[5] Entropy 0.00
Prob 0.18 Entropy 0.18
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 0.07 Entropy 0.07
Num [1] Counts[17] Entropy 0.00
Num [1] Counts[17] Entropy 0.00
Prob 0.61 Entropy 0.61
Num [1] Counts[9] Entropy 0.00
Num [1] Counts[9] Entropy 0.00
Prob 0.32 Entropy 0.32
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 0.07 Entropy 0.07
Num [1] Counts[23] Entropy 0.00
Num [1] Counts[23] Entropy 0.00
Prob 0.82 Entropy 0.82
Num [1] Counts[5] Entropy 0.00
Num [1] Counts[5] Entropy 0.00
Prob 0.18 Entropy 0.18
Num [1] Counts[28] Entropy 0.00
Num [1] Counts[28] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 3
Num [1 2] Counts[20  4] Entropy 0.65
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 0.08 Entropy 0.08
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.04 Entropy 0.04
Num [1 2] Counts[9 3] Entropy 0.81
Num [1 2] Counts[9 3] Entropy 0.81
Prob 0.50 Entropy 0.50
Num [1] Counts[5] Entropy 0.00
Num [1] Counts[5] Entropy 0.00
Prob 0.21 Entropy 0.21
Num [1 2] Counts[3 1] Entropy 0.81
Num [1 2] Counts[3 1] Entropy 0.81
Prob 0.17 Entropy 0.17
Num [1 2] Counts[4 2] Entropy 0.92
Num [1 2] Counts[4 2] Entropy 0.92
Prob 0.25 Entropy 0.25
Num [1 2] Counts[4 1] Entropy 0.72
Num [1 2] Counts[4 1] Entropy 0.72
Prob 0.21 Entropy 0.21
Num [1 2] Counts[5 1] Entropy 0.65
Num [1 2] Counts[5 1] Entropy 0.65
Prob 0.25 Entropy 0.25
Num [1] Counts[5] Entropy 0.00
Num [1] Counts[5] Entropy 0.00
Prob 0.21 Entropy 0.21
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 0.08 Entropy 0.08
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 0.08 Entropy 0.08
Num [1] Counts[17] Entropy 0.00
Num [1] Counts[17] Entropy 0.00
Prob 0.71 Entropy 0.71
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 0.12 Entropy 0.12
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.08 Entropy 0.08
Num [1 2] Counts[20  4] Entropy 0.65
Num [1 2] Counts[20  4] Entropy 0.65
Prob 1.00 Entropy 1.00
control 2 [0.10917033867559889, 0.10754117128777607, 0.5352354423915429, 0.0] 0.7519469523549178 2
Num [1 2] Counts[ 2 23] Entropy 0.40
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.04 Entropy 0.04
Num [2] Counts[4] Entropy 0.00
Num [2] Counts[4] Entropy 0.00
Prob 0.16 Entropy 0.16
Num [1 2] Counts[1 7] Entropy 0.54
Num [1 2] Counts[1 7] Entropy 0.54
Prob 0.32 Entropy 0.32
Num [1 2] Counts[1 6] Entropy 0.59
Num [1 2] Counts[1 6] Entropy 0.59
Prob 0.28 Entropy 0.28
Num [2] Counts[5] Entropy 0.00
Num [2] Counts[5] Entropy 0.00
Prob 0.20 Entropy 0.20
Num [2] Counts[8] Entropy 0.00
Num [2] Counts[8] Entropy 0.00
Prob 0.32 Entropy 0.32
Num [2] Counts[6] Entropy 0.00
Num [2] Counts[6] Entropy 0.00
Prob 0.24 Entropy 0.24
Num [1 2] Counts[1 5] Entropy 0.65
Num [1 2] Counts[1 5] Entropy 0.65
Prob 0.24 Entropy 0.24
Num [1 2] Counts[1 3] Entropy 0.81
Num [1 2] Counts[1 3] Entropy 0.81
Prob 0.16 Entropy 0.16
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.04 Entropy 0.04
Num [1 2] Counts[1 6] Entropy 0.59
Num [1 2] Counts[1 6] Entropy 0.59
Prob 0.28 Entropy 0.28
Num [1 2] Counts[ 1 10] Entropy 0.44
Num [1 2] Counts[ 1 10] Entropy 0.44
Prob 0.44 Entropy 0.44
Num [2] Counts[7] Entropy 0.00
Num [2] Counts[7] Entropy 0.00
Prob 0.28 Entropy 0.28
Num [1 2] Counts[ 2 23] Entropy 0.40
Num [1 2] Counts[ 2 23] Entropy 0.40
Prob 1.00 Entropy 1.00
control 1 [0.06257019037535033, 0.11636930909320664, 0.043132137953755256, 0.0] 0.22207163742231223 5
Num [2] Counts[23] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.04 Entropy 0.04
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 0.13 Entropy 0.13
Num [2] Counts[8] Entropy 0.00
Num [2] Counts[8] Entropy 0.00
Prob 0.35 Entropy 0.35
Num [2] Counts[11] Entropy 0.00
Num [2] Counts[11] Entropy 0.00
Prob 0.48 Entropy 0.48
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.04 Entropy 0.04
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 0.13 Entropy 0.13
Num [2] Counts[6] Entropy 0.00
Num [2] Counts[6] Entropy 0.00
Prob 0.26 Entropy 0.26
Num [2] Counts[6] Entropy 0.00
Num [2] Counts[6] Entropy 0.00
Prob 0.26 Entropy 0.26
Num [2] Counts[5] Entropy 0.00
Num [2] Counts[5] Entropy 0.00
Prob 0.22 Entropy 0.22
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.09 Entropy 0.09
Num [2] Counts[7] Entropy 0.00
Num [2] Counts[7] Entropy 0.00
Prob 0.30 Entropy 0.30
Num [2] Counts[16] Entropy 0.00
Num [2] Counts[16] Entropy 0.00
Prob 0.70 Entropy 0.70
Num [2] Counts[23] Entropy 0.00
Num [2] Counts[23] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 9
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 8
Num [1] Counts[17] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.06 Entropy 0.06
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.06 Entropy 0.06
Num [1] Counts[7] Entropy 0.00
Num [1] Counts[7] Entropy 0.00
Prob 0.41 Entropy 0.41
Num [1] Counts[5] Entropy 0.00
Num [1] Counts[5] Entropy 0.00
Prob 0.29 Entropy 0.29
Num [1] Counts[3] Entropy 0.00
Num [1] Counts[3] Entropy 0.00
Prob 0.18 Entropy 0.18
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 0.12 Entropy 0.12
Num [1] Counts[4] Entropy 0.00
Num [1] Counts[4] Entropy 0.00
Prob 0.24 Entropy 0.24
Num [1] Counts[4] Entropy 0.00
Num [1] Counts[4] Entropy 0.00
Prob 0.24 Entropy 0.24
Num [1] Counts[5] Entropy 0.00
Num [1] Counts[5] Entropy 0.00
Prob 0.29 Entropy 0.29
Num [1] Counts[2] Entropy 0.00
Num [1] Counts[2] Entropy 0.00
Prob 0.12 Entropy 0.12
Num [1] Counts[17] Entropy 0.00
Num [1] Counts[17] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [1] Counts[17] Entropy 0.00
Num [1] Counts[17] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 7
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Prob 0.67 Entropy 0.67
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.33 Entropy 0.33
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 1.00 Entropy 1.00
control 1 [0.0, 0.2516291673878229, 0.0, 0.0] 0.2516291673878229 6
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 7
Num [2] Counts[8] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.12 Entropy 0.12
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 0.38 Entropy 0.38
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [2] Counts[8] Entropy 0.00
Num [2] Counts[8] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [2] Counts[5] Entropy 0.00
Num [2] Counts[5] Entropy 0.00
Prob 0.62 Entropy 0.62
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.12 Entropy 0.12
Num [2] Counts[8] Entropy 0.00
Num [2] Counts[8] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 6
Num [2] Counts[6] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.17 Entropy 0.17
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.33 Entropy 0.33
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [2] Counts[6] Entropy 0.00
Num [2] Counts[6] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.33 Entropy 0.33
Num [2] Counts[4] Entropy 0.00
Num [2] Counts[4] Entropy 0.00
Prob 0.67 Entropy 0.67
Num [2] Counts[6] Entropy 0.00
Num [2] Counts[6] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 5
Num [1 2] Counts[1 5] Entropy 0.65
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 0.50 Entropy 0.50
Num [1 2] Counts[1 5] Entropy 0.65
Num [1 2] Counts[1 5] Entropy 0.65
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.33 Entropy 0.33
Num [1 2] Counts[1 3] Entropy 0.81
Num [1 2] Counts[1 3] Entropy 0.81
Prob 0.67 Entropy 0.67
Num [1 2] Counts[1 5] Entropy 0.65
Num [1 2] Counts[1 5] Entropy 0.65
Prob 1.00 Entropy 1.00
control 0 [0.19087450462110933, 0.0, 0.10917033867559889, 0.0] 0.3000448432967082 4
Num [1 2] Counts[1 3] Entropy 0.81
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [1 2] Counts[1 3] Entropy 0.81
Num [1 2] Counts[1 3] Entropy 0.81
Prob 1.00 Entropy 1.00
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.50 Entropy 0.50
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.25 Entropy 0.25
Num [1 2] Counts[1 3] Entropy 0.81
Num [1 2] Counts[1 3] Entropy 0.81
Prob 1.00 Entropy 1.00
control 0 [0.8112781244591328, 0.0, 0.8112781244591328, 0.0] 1.6225562489182657 5
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 7
Num [1 2] Counts[1 1] Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Num [1 2] Counts[1 1] Entropy 1.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 6
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 5
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 0.67 Entropy 0.67
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 0.33 Entropy 0.33
Num [2] Counts[3] Entropy 0.00
Num [2] Counts[3] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 4
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 1.00 Entropy 1.00
Num [1 2] Counts[1 2] Entropy 0.92
Num [1 2] Counts[1 2] Entropy 0.92
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 3
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [1] Counts[1] Entropy 0.00
Num [1] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 2
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[2] Entropy 0.00
Num [2] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 1
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
Num [2] Counts[1] Entropy 0.00
Num [2] Counts[1] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 0
In [89]:
data = [["D1","Sunny","Hot","High","Weak","No"],
["D2","Sunny","Hot","High","Strong","No"],
["D3","Overcast","Hot","High","Weak","Yes"],
["D4","Rain","Mild","High","Weak","Yes"],
["D5","Rain","Cool","Normal","Weak","Yes"],
["D6","Rain","Cool","Normal","Strong","No"],
["D7","Overcast","Cool","Normal","Strong","Yes"],
["D8","Sunny","Mild","High","Weak","No"],
["D9","Sunny","Cool","Normal","Weak","Yes"],
["D10","Rain","Mild","Normal","Weak","Yes"],
["D11","Sunny","Mild","Normal","Strong","Yes"],
["D12","Overcast","Mild","High","Strong","Yes"],
["D13","Overcast","Hot","Normal","Weak","Yes"],
["D14","Rain","Mild","High","Strong","No"]]
columns=["Day","Outlook","Temperature","Humidity","Wind","Play Golf"]
df_golf = pd.DataFrame(data,columns=columns )

m_tree = Tree(verbose=True,col_names=columns[1:5] )
m_tree.train(df_golf.iloc[:,1:5].values,df_golf.iloc[:,-1].values)
#m_tree.entropy_2v(df_golf.iloc[:,4],df_golf.iloc[:,-1].values)
Num ['No' 'Yes'] Counts[5 9] Entropy 0.94
Num ['Yes'] Counts[4] Entropy 0.00
Num ['Yes'] Counts[4] Entropy 0.00
Prob 0.29 Entropy 0.29
Num ['No' 'Yes'] Counts[2 3] Entropy 0.97
Num ['No' 'Yes'] Counts[2 3] Entropy 0.97
Prob 0.36 Entropy 0.36
Num ['No' 'Yes'] Counts[3 2] Entropy 0.97
Num ['No' 'Yes'] Counts[3 2] Entropy 0.97
Prob 0.36 Entropy 0.36
Num ['No' 'Yes'] Counts[1 3] Entropy 0.81
Num ['No' 'Yes'] Counts[1 3] Entropy 0.81
Prob 0.29 Entropy 0.29
Num ['No' 'Yes'] Counts[2 2] Entropy 1.00
Num ['No' 'Yes'] Counts[2 2] Entropy 1.00
Prob 0.29 Entropy 0.29
Num ['No' 'Yes'] Counts[2 4] Entropy 0.92
Num ['No' 'Yes'] Counts[2 4] Entropy 0.92
Prob 0.43 Entropy 0.43
Num ['No' 'Yes'] Counts[4 3] Entropy 0.99
Num ['No' 'Yes'] Counts[4 3] Entropy 0.99
Prob 0.50 Entropy 0.50
Num ['No' 'Yes'] Counts[1 6] Entropy 0.59
Num ['No' 'Yes'] Counts[1 6] Entropy 0.59
Prob 0.50 Entropy 0.50
Num ['No' 'Yes'] Counts[3 3] Entropy 1.00
Num ['No' 'Yes'] Counts[3 3] Entropy 1.00
Prob 0.43 Entropy 0.43
Num ['No' 'Yes'] Counts[2 6] Entropy 0.81
Num ['No' 'Yes'] Counts[2 6] Entropy 0.81
Prob 0.57 Entropy 0.57
Num ['Yes'] Counts[4] Entropy 0.00
Num ['Yes'] Counts[4] Entropy 0.00
Num ['Yes'] Counts[4] Entropy 0.00
Prob 1.00 Entropy 1.00
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.25 Entropy 0.25
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.25 Entropy 0.25
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.50 Entropy 0.50
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 2
Num ['No' 'Yes'] Counts[2 3] Entropy 0.97
Num ['No' 'Yes'] Counts[2 3] Entropy 0.97
Num ['No' 'Yes'] Counts[2 3] Entropy 0.97
Prob 1.00 Entropy 1.00
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Prob 0.40 Entropy 0.40
Num ['No' 'Yes'] Counts[1 2] Entropy 0.92
Num ['No' 'Yes'] Counts[1 2] Entropy 0.92
Prob 0.60 Entropy 0.60
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Prob 0.40 Entropy 0.40
Num ['No' 'Yes'] Counts[1 2] Entropy 0.92
Num ['No' 'Yes'] Counts[1 2] Entropy 0.92
Prob 0.60 Entropy 0.60
Num ['No'] Counts[2] Entropy 0.00
Num ['No'] Counts[2] Entropy 0.00
Prob 0.40 Entropy 0.40
Num ['Yes'] Counts[3] Entropy 0.00
Num ['Yes'] Counts[3] Entropy 0.00
Prob 0.60 Entropy 0.60
control 3 [0.0, 0.01997309402197489, 0.01997309402197489, 0.9709505944546686] 1.0108967824986184 1
Num ['No' 'Yes'] Counts[3 2] Entropy 0.97
Num ['No' 'Yes'] Counts[3 2] Entropy 0.97
Num ['No' 'Yes'] Counts[3 2] Entropy 0.97
Prob 1.00 Entropy 1.00
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.20 Entropy 0.20
Num ['No'] Counts[2] Entropy 0.00
Num ['No'] Counts[2] Entropy 0.00
Prob 0.40 Entropy 0.40
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Prob 0.40 Entropy 0.40
Num ['No'] Counts[3] Entropy 0.00
Num ['No'] Counts[3] Entropy 0.00
Prob 0.60 Entropy 0.60
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.40 Entropy 0.40
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Num ['No' 'Yes'] Counts[1 1] Entropy 1.00
Prob 0.40 Entropy 0.40
Num ['No' 'Yes'] Counts[2 1] Entropy 0.92
Num ['No' 'Yes'] Counts[2 1] Entropy 0.92
Prob 0.60 Entropy 0.60
control 2 [0.0, 0.5709505944546686, 0.9709505944546686, 0.01997309402197489] 1.5618742829313121 2
Num ['No'] Counts[2] Entropy 0.00
Num ['No'] Counts[2] Entropy 0.00
Num ['No'] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num ['No'] Counts[1] Entropy 0.00
Num ['No'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['No'] Counts[1] Entropy 0.00
Num ['No'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['No'] Counts[1] Entropy 0.00
Num ['No'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['No'] Counts[1] Entropy 0.00
Num ['No'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['No'] Counts[2] Entropy 0.00
Num ['No'] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 3
Num ['Yes'] Counts[3] Entropy 0.00
Num ['Yes'] Counts[3] Entropy 0.00
Num ['Yes'] Counts[3] Entropy 0.00
Prob 1.00 Entropy 1.00
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.33 Entropy 0.33
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.67 Entropy 0.67
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.33 Entropy 0.33
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 0.67 Entropy 0.67
Num ['Yes'] Counts[3] Entropy 0.00
Num ['Yes'] Counts[3] Entropy 0.00
Prob 1.00 Entropy 1.00
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 2
Num ['No'] Counts[3] Entropy 0.00
Num ['No'] Counts[3] Entropy 0.00
Num ['No'] Counts[3] Entropy 0.00
Prob 1.00 Entropy 1.00
Num ['No'] Counts[2] Entropy 0.00
Num ['No'] Counts[2] Entropy 0.00
Prob 0.67 Entropy 0.67
Num ['No'] Counts[1] Entropy 0.00
Num ['No'] Counts[1] Entropy 0.00
Prob 0.33 Entropy 0.33
Num ['No'] Counts[3] Entropy 0.00
Num ['No'] Counts[3] Entropy 0.00
Prob 1.00 Entropy 1.00
Num ['No'] Counts[1] Entropy 0.00
Num ['No'] Counts[1] Entropy 0.00
Prob 0.33 Entropy 0.33
Num ['No'] Counts[2] Entropy 0.00
Num ['No'] Counts[2] Entropy 0.00
Prob 0.67 Entropy 0.67
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 1
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['Yes'] Counts[2] Entropy 0.00
Num ['Yes'] Counts[2] Entropy 0.00
Prob 1.00 Entropy 1.00
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
Num ['Yes'] Counts[1] Entropy 0.00
Num ['Yes'] Counts[1] Entropy 0.00
Prob 0.50 Entropy 0.50
control 0 [0.0, 0.0, 0.0, 0.0] 0.0 0
In [90]:
edges_n = {(u,v):m_tree.G.get_edge_data(u,v)['attr_dict']['name']
           for u,v in m_tree.G.edges()}
edges_n
Out[90]:
{('Outlook', 'Outlook-Yes'): 'Overcast',
 ('Outlook', 'Wind'): 'Rain',
 ('Outlook', 'Humidity'): 'Sunny',
 ('Wind', 'Wind-No'): 'Strong',
 ('Wind', 'Wind-Yes'): 'Weak',
 ('Humidity', 'Humidity-No'): 'High',
 ('Humidity', 'Humidity-Yes'): 'Normal'}
In [91]:
plt.title('Tree Model')
pos = graphviz_layout(m_tree.G, prog='dot')
edges_n = {(u,v):m_tree.G.get_edge_data(u,v)['attr_dict']['name']
           for u,v in m_tree.G.edges()}
nx.draw_networkx_edge_labels(m_tree.G,pos,edge_labels=edges_n)
nx.draw(m_tree.G, pos, with_labels=True, arrows=True)
plt.show()
In [30]:
import graphviz
from sklearn.datasets import load_iris
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree 
clf = tree.DecisionTreeClassifier(random_state=0,criterion='entropy')
clf.fit(df_trans, y)
Out[30]:
DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=0,
            splitter='best')
In [31]:
dot_data = tree.export_graphviz(clf, out_file=None, 
                         feature_names=iris.feature_names,  
                         class_names=iris.target_names,  
                         filled=True, rounded=True,  
                         special_characters=True)  
graph = graphviz.Source(dot_data)  
graph 
Out[31]:
Tree 0 petal width (cm) ≤ 1.5 entropy = 1.585 samples = 150 value = [50, 50, 50] class = setosa 1 entropy = 0.0 samples = 50 value = [50, 0, 0] class = setosa 0->1 True 2 petal width (cm) ≤ 3.5 entropy = 1.0 samples = 100 value = [0, 50, 50] class = versicolor 0->2 False 3 petal length (cm) ≤ 3.5 entropy = 0.391 samples = 52 value = [0, 48, 4] class = versicolor 2->3 10 petal width (cm) ≤ 4.5 entropy = 0.25 samples = 48 value = [0, 2, 46] class = virginica 2->10 4 entropy = 0.0 samples = 47 value = [0, 47, 0] class = versicolor 3->4 5 petal length (cm) ≤ 4.5 entropy = 0.722 samples = 5 value = [0, 1, 4] class = virginica 3->5 6 sepal width (cm) ≤ 0.5 entropy = 0.918 samples = 3 value = [0, 1, 2] class = virginica 5->6 9 entropy = 0.0 samples = 2 value = [0, 0, 2] class = virginica 5->9 7 entropy = 1.0 samples = 2 value = [0, 1, 1] class = versicolor 6->7 8 entropy = 0.0 samples = 1 value = [0, 0, 1] class = virginica 6->8 11 sepal width (cm) ≤ 1.5 entropy = 0.402 samples = 25 value = [0, 2, 23] class = virginica 10->11 22 entropy = 0.0 samples = 23 value = [0, 0, 23] class = virginica 10->22 12 entropy = 0.0 samples = 14 value = [0, 0, 14] class = virginica 11->12 13 petal length (cm) ≤ 4.5 entropy = 0.684 samples = 11 value = [0, 2, 9] class = virginica 11->13 14 sepal width (cm) ≤ 2.5 entropy = 0.764 samples = 9 value = [0, 2, 7] class = virginica 13->14 21 entropy = 0.0 samples = 2 value = [0, 0, 2] class = virginica 13->21 15 sepal length (cm) ≤ 3.5 entropy = 0.65 samples = 6 value = [0, 1, 5] class = virginica 14->15 18 sepal length (cm) ≤ 3.5 entropy = 0.918 samples = 3 value = [0, 1, 2] class = virginica 14->18 16 entropy = 0.0 samples = 3 value = [0, 0, 3] class = virginica 15->16 17 entropy = 0.918 samples = 3 value = [0, 1, 2] class = virginica 15->17 19 entropy = 0.0 samples = 1 value = [0, 1, 0] class = versicolor 18->19 20 entropy = 0.0 samples = 2 value = [0, 0, 2] class = virginica 18->20