package com.vstartek.pca.kmeans;
/*
* Implements the k-means algorithm
*
* Manas Somaiya
* Computer and Information Science and Engineering
* University of Florida
*
* Created: October 29, 2003
* Last updated: October 30, 2003
*
*/
import java.io.*;
import java.util.*;
/**
* Implements the k-means algorithm
* @author Manas Somaiya mhs@cise.ufl.edu
*/
public class kMeans {
/** Number of clusters */
private int k;
/** Array of clusters */
private cluster[] clusters;
/** Number of iterations */
private int nIterations;
/** Vector of data points */
private Vector kMeansPoints;
/** Name of the input file */
private String inputFileName;
/**
* Returns a new instance of kMeans algorithm
*
* @param k number of clusters
* @param inputFileName name of the file containing input data
*/
public kMeans(int k, String inputFileName) {
this.k = k;
this.inputFileName = inputFileName;
this.clusters = new cluster[this.k];
this.nIterations = 0;
this.kMeansPoints = new Vector();
} // end of kMeans()
/**
* Returns a new instance of kMeans algorithm
*
* @param k number of clusters
* @param kMeansPoints List containing objects of type kMeansPoint
*/
public kMeans(int k, List kMeansPoints) {
this.k = k;
this.inputFileName = inputFileName;
this.clusters = new cluster[this.k];
this.nIterations = 0;
this.kMeansPoints=new Vector(kMeansPoints);
} // end of kMeans()
/**
* Reads the input data from the file and stores the data points in the vector
*/
public void readData() throws IOException{
BufferedReader in = new BufferedReader(new FileReader(this.inputFileName));
String line = "";
while ((line = in.readLine()) != null ){
// StringTokenizer st = new StringTokenizer(line, " \t\n\r\f,");
String[] st = line.split(":");
//3维
if (st.length == 3) {
kMeansPoint dp = new kMeansPoint(Double.parseDouble(st[0]), Double.parseDouble(st[1]), Double.parseDouble(st[2]));
dp.assignToCluster(0);
this.kMeansPoints.add(dp);
}
if(st.length>=2){
List<Double> listST = new ArrayList<Double>();
for(String str:st){
listST.add(Double.parseDouble(str));
}
kMeansPoint dp = new kMeansPoint(listST);
dp.assignToCluster(0);
this.kMeansPoints.add(dp);
}
}
in.close();
} // end of readData()
/**
* Runs the k-means algorithm over the data set
*/
public void runKMeans() {
// Select k points as initial means
for (int i=0; i < k; i++){
this.clusters[i] = new cluster(i);
this.clusters[i].setMean((kMeansPoint)(this.kMeansPoints.get((int)(Math.random() * this.kMeansPoints.size()))));
}
do {
// Form k clusters
Iterator i = this.kMeansPoints.iterator();
while (i.hasNext())
this.assignToCluster((kMeansPoint)(i.next()));
this.nIterations++;
}
// Repeat while centroids do not change
while (this.updateMeans());
} // end of runKMeans()
public static void write(String path, String content){
String s1 = new String();
try{
File f = new File(path);
if(f.exists()){
System.out.println("文件存在");
}else{
System.out.println("文件不存在,正在创建....");
if(f.createNewFile()){
System.out.println("文件创建成功!");
}else{
System.out.println("文件创建失败!");
}
}
s1 = "\n" + content;
OutputStream outPut = new FileOutputStream(path, true);
byte[] b = s1.getBytes();
outPut.write(b, 0, b.length);
outPut.close();
}catch(Exception e){
e.printStackTrace();
}
}
/**
* Assigns a data point to one of the k clusters based on its distance from the means of the clusters
*
* @param dp data point to be assigned
*/
public void assignToCluster(kMeansPoint dp) {
int currentCluster = dp.getClusterNumber();
if(dp.getListData().size() == 3){
double minDistance = kMeansPoint.distance(dp, this.clusters[currentCluster].getMean());;
for (int i=0; i <this.k; i++)
if (kMeansPoint.distance(dp, this.clusters[i].getMean()) < minDistance) {
minDistance = kMeansPoint.distance(dp, this.clusters[i].getMean());
currentCluster = i;
}
}else{
double minDistance = kMeansPoint.comPointsDis(dp, this.clusters[currentCluster].getMean());;
for (int i=0; i <this.k; i++)
if (kMeansPoint.comPointsDis(dp, this.clusters[i].getMean()) < minDistance) {
minDistance = kMeansPoint.comPointsDis(dp, this.clusters[i].getMean());
currentCluster = i;
}
}
dp.assignToCluster(currentCluster);
} // end of assignToCluster
/**
* Updates the means of all k clusters, and returns if they have changed or not
*
* @return have the updated means of the clusters changed or not
*/
private boolean updateMeans() {
boolean reply = false;
if(((kMeansPoint)this.kMeansPoints.get(0)).getListData().size() == 3){
double[] x = new double[this.k];
double[] y = new double[this.k];
double[] z = new double[this.k];
int[] size = new int[this.k];
kMeansPoint[] pastMeans = new kMeansPoint[this.k];
for (int i=0; i<this.k; i++) {
x[i] = 0.0;
y[i] = 0.0;
z[i] = 0.0;
size[i] = 0;
pastMeans[i] = this.clusters[i].getMean();
}
Iterator i = this.kMeansPoints.iterator();
while (i.hasNext()) {
kMeansPoint dp = (kMeansPoint)(i.next());
int currentCluster = dp.getClusterNumber();
x[currentCluster] += dp.getX();
y[currentCluster] += dp.getY();
z[currentCluster] += dp.getZ();
size[currentCluster]++;
}
for (int j=0; j < this.k; j++ )
if(size[j] != 0) {
x[j] /= size[j];
y[j] /= size[j];
z[j] /= size[j];
kMeansPoint temp = new kMeansPoint(x[j], y[j], z[j]);
temp.assignToCluster(j);
this.clusters[j].setMean(temp);
if (kMeansPoint.distance(pastMeans[j], this.clusters[j].getMean()) !=0 )
reply = true;
}
}else{
double[][] douAVE = new double[((kMeansPoint)this.kMeansPoints.get(0)).getListData().size()][this.k];
int[] size = new int[this.k];
kMeansPoint[] pastMeans = new kMeansPoint[this.k];
for (int i=0; i<this.k; i++) {
for(int j = 0; j < ((kMeansPoint)this.kMeansPoints.get(0)).getListData().size(); j++){
douAVE[j][i] = 0.0;
}
size[i] = 0;
pastMeans[i] = this.clusters[i].getMean();
}
Iterator i = this.kMeansPoints.iterator();
while (i.hasNext()) {
kMeansPoint dp = (kMeansPoint)(i.next());
int currentCluster = dp.getClusterNumber();
for(int j = 0; j < ((kMeansPoint)this.kMeansPoints.get(0)).getListData().size(); j++){
douAVE[j][currentCluster] += dp.getListData().get(j);
}
size[currentCluster]++;
}
for (int j=0; j < this.k; j++ )
if(size[j] != 0) {
for(int k = 0; k < ((kMeansPoint)this.kMeansPoints.get(0)).getListData().size(); k++){
douAVE[k][j] /= size[j];
}
List<Double> listKM = new ArrayList<Double>();
for(int k = 0; k < ((kMeansPoint)this.kMeansPoints.get(0)).getListData().size(); k++){
listKM.add(douAVE[k][j]);
}
kMeansPoint temp = new kMeansPoint(listKM);
temp.assignToCluster(j);
this.clusters[j].setMean(temp);
if (kMeansPoint.comPointsDis(pastMeans[j], this.clusters[j].getMean()) !=0 )
reply = true;
}
}
return reply;
} // end of updateMeans()
/**
* Returns the value of k
*
* @
没有合适的资源?快使用搜索试试~ 我知道了~
K_means推荐算法的java实现
共3个文件
java:3个
3星 · 超过75%的资源 需积分: 32 31 下载量 80 浏览量
2011-03-12
14:57:25
上传
评论 2
收藏 5KB RAR 举报
温馨提示
用java实现了典型K_means推荐算法,包括3个class,并进行了测试
资源推荐
资源详情
资源评论
收起资源包目录
kmeans.rar (3个子文件)
kmeans
kMeansPoint.java 5KB
cluster.java 2KB
kMeans.java 13KB
共 3 条
- 1
资源评论
- miss_now2012-04-18一个说明文档都没用,哪值得了10分啊
- HensonHan2012-06-09对啊,没有什么说明文档
wuxn06
- 粉丝: 1
- 资源: 5
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功