import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, LeaveOneOut
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
import pydotplus
from IPython.display import Image


from sklearn.tree import DecisionTreeClassifier
clf = DecisionTreeClassifier()


fichero = './datosActividades.xlsx'
individuos = ['Hombre1', 'Hombre2', 'Mujer1']

data = pd.read_excel(fichero, sheet_name='Hombre1')

for persona in individuos[1:]:
    data = data.append(pd.read_excel(fichero, sheet_name=persona))

data.reset_index(drop=True)

scaler = MinMaxScaler()
X = data.iloc[:,1:].values

target = data.iloc[:,0].values
etiquetas = np.unique(target)
y = np.zeros(target.shape[0], dtype=int)
for i,l in enumerate(etiquetas):
    y[np.argwhere(target == l)] = i

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1/3, stratify=y)

clf.fit(X_train, y_train)

clf.score(X_test, y_test)


tree.plot_tree(clf)

dot_data = tree.export_graphviz(clf, out_file=None,
                                feature_names=['Media acc', 'Desv acc', 'Rango acc', 'Sim acc', 'Media gyr', 'Desv gyr',
                                               'Rango gyr', 'Sim gyr'],
                                class_names=etiquetas, filled=True, rounded=True, special_characters=True)

graph = pydotplus.graph_from_dot_data(dot_data)

Image(graph.create_png())


# # VALIDACIÓN CRUZADA

K = 10
skf = StratifiedKFold(n_splits=K)

r_tree = pd.DataFrame(columns=['Fold', 'Score'])
k = 0
for train_index, test_index in skf.split(X,y):
    clf.fit(X[train_index], y[train_index])
    r_tree.loc[k,:] = [k,clf.score(X[test_index], y[test_index])]
    k += 1
    #print(persona , '->', tree.score(X_test, y_test))
print(r_tree)
print("MEDIA =", r_tree.mean()[1], "\tDESVIACIÓN ESTÁNDAR =", r_tree.std()[1], "\n" )

dot_data = tree.export_graphviz(clf, out_file=None,
                                feature_names=['Media acc', 'Desv acc', 'Rango acc', 'Sim acc', 'Media gyr', 'Desv gyr',
                                               'Rango gyr', 'Sim gyr'],
                                class_names=etiquetas, filled=True, rounded=True, special_characters=True)

graph = pydotplus.graph_from_dot_data(dot_data)

Image(graph.create_png())


# # One Out

loo=LeaveOneOut()
loo.get_n_splits(X)
print(loo)
clf2 = DecisionTreeClassifier()
o_tree = pd.DataFrame(columns=['Fold', 'Score'])
k = 0
for train_index, test_index in loo.split(X):
    clf2.fit(X[train_index], y[train_index])
    o_tree.loc[k,:] = [k,clf2.score(X[test_index], y[test_index])]
    k += 1
    print("TRAIN:", train_index, "TEST:", test_index)
    #X_train, X_test = X[train_index], X[test_index]
    #y_train, y_test = y[train_index], y[test_index]
    #print(X_train, X_test, y_train, y_test)
    
print(o_tree)
print("MEDIA =", o_tree.mean()[1], "\tDESVIACIÓN ESTÁNDAR =", o_tree.std()[1], "\n" )

dot_data = tree.export_graphviz(clf2, out_file=None,
                                feature_names=['Media acc', 'Desv acc', 'Rango acc', 'Sim acc', 'Media gyr', 'Desv gyr',
                                               'Rango gyr', 'Sim gyr'],
                                class_names=etiquetas, filled=True, rounded=True, special_characters=True)

graph = pydotplus.graph_from_dot_data(dot_data)

Image(graph.create_png())


# # Árbol a implementar

clf_imp = DecisionTreeClassifier()
clf_imp.fit(X,y)
tree.plot_tree(clf_imp)

dot_data = tree.export_graphviz(clf_imp, out_file=None,
                                feature_names=['Media acc', 'Desv acc', 'Rango acc', 'Sim acc', 'Media gyr', 'Desv gyr',
                                               'Rango gyr', 'Sim gyr'],
                                class_names=etiquetas, filled=True, rounded=True, special_characters=True)

graph = pydotplus.graph_from_dot_data(dot_data)

Image(graph.create_png())
