import random
import numpy as np
import matplotlib.pyplot as plt
import pickle,pprint
from classifier.knn import KNearestNeighbor
from classifier.svm import LinearSVM
from classifier.softmax import Softmax
from classifier.mlp import MultiLayerPerceptron
print "hello nut"
##load dataset
pkl_file = open('dataset/train_forstu.pickle', 'rb')
train = pickle.load(pkl_file)
#pprint.pprint(train)
pkl_file.close()
train_x=np.array(train[0])
train_y=np.array(train[1])
print train_x.shape
print train_y.shape
############################################################################
######### type in the path of test dataset in the next line ###########
############################################################################
pkl_file = open('dataset/valid_forstu.pickle', 'rb')
############################################################################
######### type in the path of test dataset in the above line ###########
############################################################################
valid = pickle.load(pkl_file)
pkl_file.close()
valid_x=valid[0]
valid_y=valid[1]
print valid_x.shape, valid_x.min()
print valid_y.shape ,valid_y.min()
############################################################################
######### type in the model in the next line ###########
############################################################################
model='mlp' #can be 'knn','svm','softmax','mlp'
############################################################################
######### type in the model in the above line ###########
############################################################################
if model=='knn':
classifier = KNearestNeighbor()
classifier.train(train_x, train_y)
pred_y=classifier.predict(valid_x,k=10)
num_correct = np.sum(pred_y == valid_y)
accuracy = float(num_correct) / valid_y.shape[0]
print 'Got %d / %d correct using knn => accuracy: %f' % (num_correct, valid_y.shape[0], accuracy)
result_file = open('predict/knn_predict.txt', 'w')
for i in xrange(valid_y.shape[0]):
result_file.write(str(pred_y[i])+'\n')
result_file.close( )
elif model=='svm':
mean_x = np.mean(train_x, axis=0)
print "mean_x",mean_x.shape
train_x-=mean_x
valid_x-=mean_x
train_x= np.hstack([train_x, np.ones((train_x.shape[0], 1))])
valid_x= np.hstack([valid_x, np.ones((valid_x.shape[0], 1))])
print train_x.shape,valid_x.shape
classifier = LinearSVM()
history1=classifier.train(train_x, train_y, learning_rate=1e-7, reg=5e4, num_iters=1000,batch_size=200, verbose=True)
history2=classifier.train(train_x, train_y, learning_rate=1e-9, reg=5e4, num_iters=1000,batch_size=200, verbose=True)
y_train_pred = classifier.predict(train_x)
print 'training accuracy: %f' % (np.mean(train_y == y_train_pred), )
y_val_pred = classifier.predict(valid_x)
print 'validation accuracy: %f using svm' % (np.mean(valid_y == y_val_pred), )
result_file = open('predict/svm_predict.txt', 'w')
for i in xrange(valid_y.shape[0]):
result_file.write(str(y_val_pred[i])+'\n')
result_file.close( )
plt.plot(history1)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()
elif model=='softmax':
mean_x = np.mean(train_x, axis=0)
print "mean_x",mean_x.shape
train_x-=mean_x
valid_x-=mean_x
train_x= np.hstack([train_x, np.ones((train_x.shape[0], 1))])
valid_x= np.hstack([valid_x, np.ones((valid_x.shape[0], 1))])
print train_x.shape,valid_x.shape
classifier = Softmax()
history1=classifier.train(train_x, train_y, learning_rate=1e-7, reg=5e4, num_iters=2000,batch_size=200, verbose=True)
#history2=classifier.train(train_x, train_y, learning_rate=1e-9, reg=5e4, num_iters=1000,batch_size=200, verbose=True)
y_train_pred = classifier.predict(train_x)
print 'training accuracy: %f' % (np.mean(train_y == y_train_pred), )
y_val_pred = classifier.predict(valid_x)
print 'validation accuracy: %f using softmax' % (np.mean(valid_y == y_val_pred), )
result_file = open('predict/softmax_predict.txt', 'w')
for i in xrange(valid_y.shape[0]):
result_file.write(str(y_val_pred[i])+'\n')
result_file.close( )
plt.plot(history1)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()
elif model=='mlp':
#mean_x = np.mean(train_x, axis=0)
#train_x-=mean_x
#valid_x-=mean_x
#train_x/=255
#valid_x/=255
train_y1=np.zeros((train_y.shape[0],6),dtype='float32')
valid_y1=np.zeros((valid_y.shape[0],6),dtype='float32')
for i in xrange(train_y.shape[0]):
train_y1[i,int(train_y[i])]=1
for i in xrange(valid_y.shape[0]):
valid_y1[i,int(valid_y[i])]=1
#print valid_y
print "x ",train_x.shape,valid_x.shape
print "y ",train_y.shape,valid_y.shape
classifier=MultiLayerPerceptron()
pred_y,loss_history_tr,loss_history_va=classifier.run_mlp(train_x,train_y1,valid_x,valid_y1,num_iters=1000,verbose=True)
print pred_y.shape
print 'validation accuracy: %f using mlp' % (np.mean(valid_y == pred_y), )
result_file = open('predict/mlp_predict.txt', 'w')
for i in xrange(valid_y.shape[0]):
result_file.write(str(pred_y[i])+'\n')
result_file.close( )
plt.plot(loss_history_tr,"g-",label="train accuracy")
plt.plot(loss_history_va,"r-.",label="valid accuracy")
plt.xlabel('Iteration number')
plt.ylabel('accuracy')
plt.legend()
plt.show()
else:
print "please check the model type, they should be from 'knn','svm','softmax','mlp'"