# coding: utf-8
# In[1]:
import numpy as np
from scipy.spatial.distance import cdist
import sklearn.datasets as ds
import math
import matplotlib.pyplot as plt
from sklearn import metrics
# In[2]:
def Nmi(xx, U, culster_number, target=None):
print("this is NMI!")
data = np.array(xx.data.T)
S,N = data.shape
# print(data.shape)
R = np.zeros((N,1))
# print(R)
for i in range(0,N):
for j in range(0,culster_number):
k = int(R[i][0])
if U[i][k]<U[i][j]:
R[i][0] = j
R = R.reshape(1,N)
ssr,nnr= R.shape
if target is None:
Q = xx.target
else:
target = np.array(target)
Q = target
Q = Q.reshape(1,N)
# print(Q)
ssq,nnq = Q.shape
# print("Q.shape=%s"%str(Q.shape))
tmp, total = Q.shape
Q_i = np.unique(Q)
Q_i = np.array([Q_i])
Q_c = len(Q_i[0])
R_i = np.unique(R)
R_i = np.array([R_i])
R_c = len(R_i[0])
idQ = (np.tile(Q, (Q_c,1)).reshape(Q_c*ssq,1*nnq) == np.tile(Q_i.T, (1,total)).reshape(Q_c*ssq,1*nnq))+0.0
idR = (np.tile(R, (R_c,1)).reshape(R_c*ssr,1*nnr) == np.tile(R_i.T, (1,total)).reshape(R_c*ssr,1*nnr))+0.0
idQR = idQ.dot(idR.T)
# print("idQR.shape=%s"%str(idQR.shape))
print(idQR)
Sq = np.zeros((Q_c,1))
Sr = np.zeros((R_c,1))
for i in range(0,Q_c):
for j in range(0,total):
if idQ[i][j]==1:
Sq[i][0]=Sq[i][0]+1
for i in range(0,R_c):
for j in range(0,total):
if idR[i][j]==1:
Sr[i][0]=Sr[i][0]+1
Pq = np.zeros((Q_c,1))
Pr = np.zeros((R_c,1))
for i in range(0,Q_c):
Pq[i][0] = Sq[i][0]/total
for i in range(0,R_c):
Pr[i][0] = Sr[i][0]/total
Pqr = idQR/total
# print("-------Pqr_--------")
# print(Pqr)
#计算熵值
Hq = 0
Hr = 0
for i in range(0,Q_c):
Hq = Hq+Pq[i][0]*math.log(Pq[i][0],2)
for i in range(0,R_c):
Hr = Hr+Pr[i][0]*math.log(Pr[i][0],2)
# 计算结果 校验值:1.339261208514355
MI = 0
for i in range(0,Q_c):
for j in range(0,R_c):
MI = MI+Pqr[i][j]*math.log((Pqr[i][j]/(Pq[i][0]*Pr[j][0])+2.2204e-16),2)#eps=2.2204e-16 1.275 1.339261208514355
NMI = MI/((Hq*Hr)**(1./2.))
print("NMI=%lf"%NMI)
return NMI
# In[3]:
def plot(self, v, u, c, labels=None):
# plt.ion()
print("-"*30+"begin drawing ... ...")
# plt.figure(1)
ax = plt.subplots()[1]
# Plot assigned clusters, for each data point in training set
# print("u:%s"%str(u.shape))
cluster_membership = np.argmax(u, axis=0)
# print("cluster_membership:%s"%str(cluster_membership.shape))
marker_lab = ['*', 'x', 'o']
color_lab = ['bs', 'rs', 'gs']
for j in range(c):
ax.scatter(
self[2][cluster_membership == j],
self[3][cluster_membership == j],
alpha = 0.8,
marker = marker_lab[j],
edgecolors = "none")
# Mark the center of each fuzzy cluster
k = 0
for pt in v:
ax.plot(pt[2], pt[3], color_lab[k])
k += 1
# ax.legend()
ax.grid(True)
# plt.pause(2) #显示秒数
# plt.close()
plt.show()
# In[4]:
def _object(u, d, m):
d2 = d ** 2
um = u ** m
j = np.sum(um*d2)
return j
# In[5]:
def _fcm_criterion(d, m, metric):
exp = -2. / (m - 1)
d2 = d ** exp
ds = np.sum(d2, axis=0, keepdims=1)
u = d2 / ds
# print("_fcm_criterion-->d2:{},ds:{},u:{}".format(d2.shape, ds.shape, u.shape))
return u
def _update_clusters(x, u, m):
um = u **m
ux = um.dot(x.T)
us = np.atleast_2d(um.sum(axis=1)).T
# print("_update_clusters-->um:{},ux:{},us:{}".format(um.shape, ux.shape, us.shape))
v = ux / us
# print("_update_clusters-->v:{}".format(v.shape))
return v
def cmean(x, c, m, e, max_iterations, v0=None, metric="euclidean" ):
# Num Features, Datapoints
S, N = x.shape
# Initialize the cluster centers
# If the user doesn't provide their own starting points,
if v0 is None:
# Pick random values from dataset
xt = x.T
np.random.seed(1)
v0 = xt[np.random.choice(xt.shape[0], c, replace=False), :]
v = np.empty((max_iterations, c, S))
v[0] = np.array(v0)
# Membership Matrix Each Data Point in eah cluster
u = np.zeros((max_iterations, c, N))
j = np.zeros(max_iterations, dtype="float64")
t = 0
while t < max_iterations - 1:
d = cdist(x.T, v[t], metric=metric).T
# Sanitize Distances (Avoid Zeroes)
d = np.fmax(d, np.finfo(x.dtype).eps)
# u updata
u[t] = _fcm_criterion(d, m, metric)
# v updata
v[t + 1] = _update_clusters(x, u[t], m)
j[t + 1] = _object(u[t], d, m)
# Stopping Criteria
if np.linalg.norm(j[t + 1] - j[t]) < e:
break
t += 1
return v[t], v[0], u[t - 1], u[0], t, j
# In[6]:
fuzzifier = 1.5
error = 0.0001
maxiter = 100
c = 3
# np.random.seed(100)
# In[12]:
iris0 = ds.load_iris()
labels = iris0.target_names
target = iris0.target
iris = np.array(iris0.data).T
# v, v0, u, u0, d, t = cmeans.fcm(iris, c, fuzzifier, error, maxiter)
v, v0, u, u0, t, j= cmean(iris, c, fuzzifier, error, maxiter)
iris = iris.T
print("Iris")
print(v)
Nmi(iris0, u.T, c)
u_max = np.argmax(u, axis=0)
print(metrics.normalized_mutual_info_score(u_max, target))
# print(v)
# print(t)
# for i in range(t+3):
# print("%3f"%j[i])
# In[8]:
plot(iris.T, v, u, c )
# In[15]:
import sklearn.datasets as ds
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
def plot_3D(self, v, u, cluster, a, b, c,labels=None):
feature_a = a
feature_b = b
feature_c = c
# plt.ion()
print("-"*30+"begin drawing ... ...")
# plt.figure(1)
# ax = plt.subplots()[1]
ax = plt.subplot(111, projection='3d') # 创建一个三维的绘图工程
# Plot assigned clusters, for each data point in training set
# print("u:%s"%str(u.shape))
cluster_membership = np.argmax(u, axis=0)
# print("cluster_membership:%s"%str(cluster_membership.shape))
marker_lab = ['^', 'x', 'o', 'd', '*', 'h', '.']
color_lab = ['bs', 'rs', 'gs', 'ys', 'ks', 'ms', 'cs']
for j in range(cluster):
ax.scatter(
self[feature_a][cluster_membership == j],
self[feature_b][cluster_membership == j],
self[feature_c][cluster_membership == j],
alpha = 0.5,
marker = marker_lab[j],
# edgecolors = "none"
)
# Mark the center of each fuzzy cluster
k = 0
for pt in v:
# ax.scatter(pt[feature_a], pt[feature_b], pt[feature_c], color_lab[k])
ax.scatter(pt[feature_a], pt[feature_b], pt[feature_c], "rs")
k += 1
# ax.legend()
ax.grid(True)
# plt.pause(2) #显示秒数
# plt.close()
plt.show()
plot_3D(iris.T, v, u, c, 1, 0, 2)
评论0