FCM_iris.rar_fcmpython_fcm算法_iris_python_聚类

共1个文件

py：1个

版权申诉

170 浏览量 2022-07-14 18:59:04 上传评论收藏 2KB RAR 举报

FCM（Fuzzy C-Means）模糊C均值聚类算法是一种在数据挖掘和机器学习领域广泛应用的聚类方法。它与经典的K-Means算法相似，但区别在于它允许数据点同时属于多个类别，这使得FCM对噪声和不规则形状的数据集具有更好的适应性。在本案例中，我们关注的是如何用Python实现FCM算法，并在Iris数据集上进行演示。 Iris数据集是机器学习领域的一个经典示例，由统计学家R.A. Fisher于1936年提出，包含了三种不同鸢尾花（Setosa, Versicolour, Virginica）的四个特征：花萼长度、花萼宽度、花瓣长度和花瓣宽度。这个数据集常用于分类和聚类算法的验证。在FCM_iris.py文件中，我们可以预期看到以下关键知识点： 1. 数据预处理：需要导入必要的库，如numpy、pandas和matplotlib，用于数据操作和可视化。接着，读取Iris数据集，通常使用sklearn.datasets库中的load_iris函数。数据集会被转化为二维数组，以便进行后续处理。 2. FCM算法实现：FCM算法的核心包括初始化、迭代更新和终止条件。初始化阶段，随机选择C个质心（cluster centers），每个数据点被分配一个模糊隶属度。然后，通过迭代更新数据点的隶属度和质心，直到满足停止条件（如达到最大迭代次数或隶属度变化小于阈值）。更新公式如下： - 数据点i到质心j的隶属度：\( u_{ij} = \frac{1}{\sum_{k=1}^{C}\left(\frac{\|x_i - c_k\|^2}{m-1}\right)^{\frac{2}{m-1}}} \) - 质心c_j的更新：\( c_j = \frac{\sum_{i=1}^{N}u_{ij}^m x_i}{\sum_{i=1}^{N}u_{ij}^m} \) 3. 参数设置：FCM算法有两个主要参数，C（簇的数量）和m（模糊因子）。m决定了聚类的模糊程度，m>1时结果更模糊，m=1则退化为K-Means。通常，m的值会在1到2之间选择。 4. 结果评估：聚类效果可以通过各种指标评估，如轮廓系数、Davies-Bouldin指数等。这些指标可以量化聚类的紧密度和分离度。 5. 可视化：使用matplotlib等库绘制二维或三维散点图，展示不同颜色代表不同的簇，帮助直观理解聚类结果。通过这个Python代码，你可以学习到如何将FCM算法应用于实际数据，以及如何对结果进行分析和可视化。这对于理解聚类算法和提高数据分析能力都非常有帮助。

资源详情

资源评论

资源推荐

收起资源包目录

FCM_iris.rar （1个子文件）

FCM_iris.py 7KB

# coding: utf-8 # In[1]: import numpy as np from scipy.spatial.distance import cdist import sklearn.datasets as ds import math import matplotlib.pyplot as plt from sklearn import metrics # In[2]: def Nmi(xx, U, culster_number, target=None): print("this is NMI!") data = np.array(xx.data.T) S,N = data.shape # print(data.shape) R = np.zeros((N,1)) # print(R) for i in range(0,N): for j in range(0,culster_number): k = int(R[i][0]) if U[i][k]<U[i][j]: R[i][0] = j R = R.reshape(1,N) ssr,nnr= R.shape if target is None: Q = xx.target else: target = np.array(target) Q = target Q = Q.reshape(1,N) # print(Q) ssq,nnq = Q.shape # print("Q.shape=%s"%str(Q.shape)) tmp, total = Q.shape Q_i = np.unique(Q) Q_i = np.array([Q_i]) Q_c = len(Q_i[0]) R_i = np.unique(R) R_i = np.array([R_i]) R_c = len(R_i[0]) idQ = (np.tile(Q, (Q_c,1)).reshape(Q_c*ssq,1*nnq) == np.tile(Q_i.T, (1,total)).reshape(Q_c*ssq,1*nnq))+0.0 idR = (np.tile(R, (R_c,1)).reshape(R_c*ssr,1*nnr) == np.tile(R_i.T, (1,total)).reshape(R_c*ssr,1*nnr))+0.0 idQR = idQ.dot(idR.T) # print("idQR.shape=%s"%str(idQR.shape)) print(idQR) Sq = np.zeros((Q_c,1)) Sr = np.zeros((R_c,1)) for i in range(0,Q_c): for j in range(0,total): if idQ[i][j]==1: Sq[i][0]=Sq[i][0]+1 for i in range(0,R_c): for j in range(0,total): if idR[i][j]==1: Sr[i][0]=Sr[i][0]+1 Pq = np.zeros((Q_c,1)) Pr = np.zeros((R_c,1)) for i in range(0,Q_c): Pq[i][0] = Sq[i][0]/total for i in range(0,R_c): Pr[i][0] = Sr[i][0]/total Pqr = idQR/total # print("-------Pqr_--------") # print(Pqr) #计算熵值 Hq = 0 Hr = 0 for i in range(0,Q_c): Hq = Hq+Pq[i][0]*math.log(Pq[i][0],2) for i in range(0,R_c): Hr = Hr+Pr[i][0]*math.log(Pr[i][0],2) # 计算结果校验值:1.339261208514355 MI = 0 for i in range(0,Q_c): for j in range(0,R_c): MI = MI+Pqr[i][j]*math.log((Pqr[i][j]/(Pq[i][0]*Pr[j][0])+2.2204e-16),2)#eps=2.2204e-16 1.275 1.339261208514355 NMI = MI/((Hq*Hr)**(1./2.)) print("NMI=%lf"%NMI) return NMI # In[3]: def plot(self, v, u, c, labels=None): # plt.ion() print("-"*30+"begin drawing ... ...") # plt.figure(1) ax = plt.subplots()[1] # Plot assigned clusters, for each data point in training set # print("u:%s"%str(u.shape)) cluster_membership = np.argmax(u, axis=0) # print("cluster_membership:%s"%str(cluster_membership.shape)) marker_lab = ['*', 'x', 'o'] color_lab = ['bs', 'rs', 'gs'] for j in range(c): ax.scatter( self[2][cluster_membership == j], self[3][cluster_membership == j], alpha = 0.8, marker = marker_lab[j], edgecolors = "none") # Mark the center of each fuzzy cluster k = 0 for pt in v: ax.plot(pt[2], pt[3], color_lab[k]) k += 1 # ax.legend() ax.grid(True) # plt.pause(2) #显示秒数 # plt.close() plt.show() # In[4]: def _object(u, d, m): d2 = d ** 2 um = u ** m j = np.sum(um*d2) return j # In[5]: def _fcm_criterion(d, m, metric): exp = -2. / (m - 1) d2 = d ** exp ds = np.sum(d2, axis=0, keepdims=1) u = d2 / ds # print("_fcm_criterion-->d2:{},ds:{},u:{}".format(d2.shape, ds.shape, u.shape)) return u def _update_clusters(x, u, m): um = u **m ux = um.dot(x.T) us = np.atleast_2d(um.sum(axis=1)).T # print("_update_clusters-->um:{},ux:{},us:{}".format(um.shape, ux.shape, us.shape)) v = ux / us # print("_update_clusters-->v:{}".format(v.shape)) return v def cmean(x, c, m, e, max_iterations, v0=None, metric="euclidean" ): # Num Features, Datapoints S, N = x.shape # Initialize the cluster centers # If the user doesn't provide their own starting points, if v0 is None: # Pick random values from dataset xt = x.T np.random.seed(1) v0 = xt[np.random.choice(xt.shape[0], c, replace=False), :] v = np.empty((max_iterations, c, S)) v[0] = np.array(v0) # Membership Matrix Each Data Point in eah cluster u = np.zeros((max_iterations, c, N)) j = np.zeros(max_iterations, dtype="float64") t = 0 while t < max_iterations - 1: d = cdist(x.T, v[t], metric=metric).T # Sanitize Distances (Avoid Zeroes) d = np.fmax(d, np.finfo(x.dtype).eps) # u updata u[t] = _fcm_criterion(d, m, metric) # v updata v[t + 1] = _update_clusters(x, u[t], m) j[t + 1] = _object(u[t], d, m) # Stopping Criteria if np.linalg.norm(j[t + 1] - j[t]) < e: break t += 1 return v[t], v[0], u[t - 1], u[0], t, j # In[6]: fuzzifier = 1.5 error = 0.0001 maxiter = 100 c = 3 # np.random.seed(100) # In[12]: iris0 = ds.load_iris() labels = iris0.target_names target = iris0.target iris = np.array(iris0.data).T # v, v0, u, u0, d, t = cmeans.fcm(iris, c, fuzzifier, error, maxiter) v, v0, u, u0, t, j= cmean(iris, c, fuzzifier, error, maxiter) iris = iris.T print("Iris") print(v) Nmi(iris0, u.T, c) u_max = np.argmax(u, axis=0) print(metrics.normalized_mutual_info_score(u_max, target)) # print(v) # print(t) # for i in range(t+3): # print("%3f"%j[i]) # In[8]: plot(iris.T, v, u, c ) # In[15]: import sklearn.datasets as ds from matplotlib import pyplot as plt from mpl_toolkits.mplot3d import Axes3D def plot_3D(self, v, u, cluster, a, b, c,labels=None): feature_a = a feature_b = b feature_c = c # plt.ion() print("-"*30+"begin drawing ... ...") # plt.figure(1) # ax = plt.subplots()[1] ax = plt.subplot(111, projection='3d') # 创建一个三维的绘图工程 # Plot assigned clusters, for each data point in training set # print("u:%s"%str(u.shape)) cluster_membership = np.argmax(u, axis=0) # print("cluster_membership:%s"%str(cluster_membership.shape)) marker_lab = ['^', 'x', 'o', 'd', '*', 'h', '.'] color_lab = ['bs', 'rs', 'gs', 'ys', 'ks', 'ms', 'cs'] for j in range(cluster): ax.scatter( self[feature_a][cluster_membership == j], self[feature_b][cluster_membership == j], self[feature_c][cluster_membership == j], alpha = 0.5, marker = marker_lab[j], # edgecolors = "none" ) # Mark the center of each fuzzy cluster k = 0 for pt in v: # ax.scatter(pt[feature_a], pt[feature_b], pt[feature_c], color_lab[k]) ax.scatter(pt[feature_a], pt[feature_b], pt[feature_c], "rs") k += 1 # ax.legend() ax.grid(True) # plt.pause(2) #显示秒数 # plt.close() plt.show() plot_3D(iris.T, v, u, c, 1, 0, 2)