import numpy as np
import pandas as pd
from pandas import Series,DataFrame
import matplotlib.pyplot as plt
def getdata(data):
with open(data) as f:
data = []
datalist = f.readlines()
for each in datalist:
each = each.strip().split('\t')
each = list(map(float,each))
data.append(each)
data = np.array(data)
return(data)
def dist(A,B):
return np.sqrt(np.sum(np.power(A - B ,2)))
def randcent(data,k):
n = np.shape(data)[1]
cent = np.zeros((k,n))
for j in range(n):
minj = min(data[:,j])
rangej =float(max(data[:,j]) - minj)
cent[:,j:j+1] = minj + rangej *np.random.rand(k,1)#注意此索引可以获得一个二维数组,而若只是data[:,1]获得的仅仅是一维数组
return cent
def KMeans(data,k,n,dist=dist, creatcent=randcent):
m = np.shape(data)[0]
labelmat = np.zeros((m,1))
cent = creatcent(data,k)
num = 0
while num<n:
num +=1
for i in range(m):
mindist = np.inf
minindex = -1
for j in range(k):
distj =dist(data[i],cent[j])
if distj <mindist:
mindist = distj
minindex = j
labelmat[i,0]=int(minindex)
''''
for a in range(k):
centa = data[np.nonzero(labelmat[:,0]==a)[0]]
cent[a] = np.mean(centa,axis=0)
'
return(cent,labelmat)
'''
return(labelmat)
西电数据挖掘作业——kmeans图片聚类python实现
需积分: 49 67 浏览量
2018-03-13
15:00:41
上传
评论 12
收藏 22KB RAR 举报
qq_40114263
- 粉丝: 12
- 资源: 14
最新资源
- 基于JAVA-JSP的现代数字化电子政务网管理系统论文-20240403更新
- 西部钻探工程有限公司安全环保问责管理办法.doc
- Using Model-Based Design to Develop SOA Applications
- PHP Decrypt跨平台解密PHP源码的扩展
- SimHei MAC字体资源
- 2024全国行政区域编码
- 数据源-数据可视化(七):Pandas香港酒店数据高级分析,涉及相关系数,协方差,数据离散化,透视表等精美可视化展示
- linux常用命令大全.doc
- 格拉斯哥大学空缺职位申请详细介绍Applicant Guide.pdf
- mmexport1702953347189.mp4
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈