"""
Tests for k-modes clustering algorithm
"""
import pickle
import unittest
import numpy as np
from nose.tools import assert_equal
from kmodes.kmodes import KModes
from kmodes.util.dissim import ng_dissim, jaccard_dissim_binary, jaccard_dissim_label
SOYBEAN = np.array([
[4, 0, 2, 1, 1, 1, 0, 1, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 3, 1, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[5, 0, 2, 1, 0, 3, 1, 1, 1, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 3, 0, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[3, 0, 2, 1, 0, 2, 0, 2, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 3, 0, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[6, 0, 2, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 3, 1, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[4, 0, 2, 1, 0, 3, 0, 2, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 3, 1, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[5, 0, 2, 1, 0, 2, 0, 1, 1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 3, 1, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[3, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 3, 0, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[3, 0, 2, 1, 0, 1, 0, 2, 1, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 3, 0, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[6, 0, 2, 1, 0, 3, 0, 1, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 3, 1, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[6, 0, 2, 1, 0, 1, 0, 1, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 3, 1, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[6, 0, 0, 2, 1, 0, 2, 1, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[4, 0, 0, 1, 0, 2, 3, 1, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[5, 0, 0, 2, 0, 3, 2, 1, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[6, 0, 0, 1, 1, 3, 3, 1, 1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[3, 0, 0, 2, 1, 0, 2, 1, 0, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[4, 0, 0, 1, 1, 1, 3, 1, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[3, 0, 0, 1, 0, 1, 2, 1, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[5, 0, 0, 2, 1, 2, 2, 1, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[6, 0, 0, 2, 0, 1, 3, 1, 1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[5, 0, 0, 2, 1, 3, 3, 1, 1, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[0, 1, 2, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[2, 1, 2, 0, 0, 3, 1, 2, 0, 1, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[2, 1, 2, 0, 0, 2, 1, 1, 0, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[0, 1, 2, 0, 0, 0, 1, 1, 1, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[0, 1, 2, 0, 0, 2, 1, 1, 1, 1, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[4, 0, 2, 0, 1, 0, 1, 2, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[2, 1, 2, 0, 0, 3, 1, 2, 0, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[0, 1, 2, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D3'],
[3, 0, 2, 0, 1, 3, 1, 2, 0, 1, 1, 0, 0, 2, 2, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[0, 1, 2, 0, 0, 1, 1, 2, 1, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[2, 1, 2, 1, 1, 3, 1, 2, 1, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[3, 1, 2, 0, 0, 1, 1, 2, 1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[2, 1, 2, 1, 1, 1, 1, 2, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[1, 1, 2, 0, 0, 3, 1, 1, 1, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[1, 1, 2, 1, 0, 0, 1, 2, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[0, 1, 2, 1, 0, 3, 1, 1, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[2, 1, 2, 0, 0, 1, 1, 2, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[3, 1, 2, 0, 0, 2, 1, 2, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[3, 1, 1, 0, 0, 2, 1, 2, 1, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[0, 1, 2, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 2, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[1, 1, 2, 1, 1, 3, 1, 2, 0, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 1, 2, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[1, 1, 2, 0, 0, 0, 1, 2, 1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[1, 1, 2, 1, 1, 2, 3, 1, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 2, 2, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[2, 1, 1, 0, 0, 3, 1, 2, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[0, 1, 1, 1, 1, 2, 1, 2, 1, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 2, 2, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
[0, 1, 2, 1, 0, 3, 1, 1, 0, 2, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
])
# Drop target column
SOYBEAN = SOYBEAN[:, :35]
SOYBEAN2 = np.array([
[4, 0, 2, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 0, 3, 0, 1, 1, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 'D1'],
[7, 0, 0, 2, 1, 0, 2, 1, 0, 0, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 0, 3, 0, 0, 0, 2, 1, 0,
4, 0, 0, 0, 0, 0, 0, 'D2'],
[0, 1, 2, 0, 0, 1, 1, 1, 1, 2, 1, 0, 0, 2, 2, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 0, 'D3'],
[2, 1, 2, 1, 1, 3, 1, 2, 1, 1, 1, 1, 0, 2, 2, 0, 0, 0, 1, 1, 1, 2, 0, 1, 0, 0, 0, 3,
4, 0, 0, 0, 0, 0, 1, 'D4'],
])
# Drop target column
SOYBEAN2 = SOYBEAN2[:, :35]
# test data with categorical variables that have been label encoded
TEST_DATA = np.array([
[2, 22, 14, 45, 2, 0, 1, 2, 5],
[2, 13, 13, 19, 2, 0, 1, 2, 5],
[3, 25, 4, 3, 0, 1, 2, 0, 4],
[2, 13, 15, 18, 0, 1, 2, 2, 3],
[3, 10, 4, 42, 0, 2, 1, 1, 2],
[2, 16, 21, 14, 0, 1, 2, 2, 2],
[2, 16, 19, 37, 0, 2, 1, 2, 2],
[2, 20, 9, 34, 0, 1, 2, 3, 5],
[2, 14, 21, 44, 0, 1, 2, 3, 2],
[2, 26, 5, 30, 0, 1, 2, 3, 3],
[3, 18, 17, 41, 3, 3, 3, 2, 0],
[2, 20, 1, 27, 3, 3, 3, 2, 0],
[3, 6, 8, 19, 0, 1, 2, 1, 2],
[2, 13, 8, 41, 3, 3, 3, 2, 0],
[2, 18, 17, 41, 3, 3, 3, 2, 0],
[2, 16, 19, 42, 0, 1, 2, 2, 5],
[7, 7, 5, 43, 0, 2, 1, 2, 2],
[2, 18, 17, 41, 3, 3, 3, 2, 0],
[3, 3, 5, 12, 3, 3, 3, 2, 0],
[2, 18, 17, 41, 3, 3, 3, 2, 0],
[7, 15, 19, 17, 0, 1, 2, 2, 2],
[1, 1, 15, 24, 0, 1, 2, 2, 2],
[2, 18, 17, 41, 3, 3, 3, 2, 0],
[2, 5, 7, 9, 0, 1, 2, 3, 5],
[2, 24, 6, 10, 0, 2, 1, 2, 2],
[2, 13, 16, 29, 0, 2, 1, 2, 2],
[3, 6, 8, 1, 0, 1, 2, 2, 5],
[2, 16, 15, 34, 0, 1, 2, 2, 1],
[0, 24, 14, 12, 3, 3, 3, 2, 0],
[3, 8, 21, 13, 3, 3, 3, 2, 0],
[2, 17, 15, 42, 3, 3, 3, 2, 0],
[2, 25, 18