package buju;
import java.util.ArrayList;
import java.util.List;
/**
* Canopy算法 借助canopy算法计算对应的Kmeans中的K值大小
* 其中对于计算K值来说, 我们这里将T2设置为平均距离
* 获取T1内圈的数据向量,k和簇的中心
*/
public class Canopy {
private List<Point> points = new ArrayList<Point>(); // 进行聚类的点
private List<List<Point>> clusters = new ArrayList<List<Point>>(); // 存储簇
private double T2 = -1; // 阈值
private double T1 = 0;
public Canopy(List<Point> points) {
for (Point point : points)
// 进行深拷贝
this.points.add(point);
}
//进行聚类,按照Canopy算法进行计算,将所有点进行聚类
public void cluster() {
//T2 = getAverageDistance(points);
T2=getAverageDistance(points);
T1=2*T2;
while (points.size() != 0) {
List<Point> cluster = new ArrayList<Point>();
Point basePoint = points.get(0); // 基准点
cluster.add(basePoint);
points.remove(0);
int index = 0;
while (index < points.size()) {
Point anotherPoint = points.get(index);
double distance = Math.sqrt((basePoint.x - anotherPoint.x)
* (basePoint.x - anotherPoint.x)
+ (basePoint.y - anotherPoint.y)
* (basePoint.y - anotherPoint.y));
if (distance <= T2) {
cluster.add(anotherPoint);
points.remove(index);
} else {
index++;
}
}
clusters.add(cluster);
}
// return clusters();
}
public void clusterT1() {
T1 = 2*getAverageDistance(points);
while (points.size() != 0) {
List<Point> cluster = new ArrayList<Point>();
Point basePoint = points.get(0); // 基准点
cluster.add(basePoint);
points.remove(0);
int index = 0;
while (index < points.size()) {
Point anotherPoint = points.get(index);
double distance = Math.sqrt((basePoint.x - anotherPoint.x)
* (basePoint.x - anotherPoint.x)
+ (basePoint.y - anotherPoint.y)
* (basePoint.y - anotherPoint.y));
if (distance <= T1) {
cluster.add(anotherPoint);
points.remove(index);
} else {
index++;
}
}
clusters.add(cluster);
}
}
//得到Cluster的数目
public int getClusterNumber() {
return clusters.size();
}
//获取Cluster对应的中心点(各点相加求平均)
public List<Point> getClusterCenterPoints() {
List<Point> centerPoints = new ArrayList<Point>();
for (List<Point> cluster : clusters) {
centerPoints.add(getCenterPoint(cluster));
}
return centerPoints;
}
//得到的中心点(各点相加求平均)
private double getAverageDistance(List<Point> points) {
double sum = 0;
int pointSize = points.size();
for (int i = 0; i < pointSize; i++) {
for (int j = 0; j < pointSize; j++) {
if (i == j)
continue;
Point pointA = points.get(i);
Point pointB = points.get(j);
sum += Math.sqrt((pointA.x - pointB.x) * (pointA.x - pointB.x)
+ (pointA.y - pointB.y) * (pointA.y - pointB.y));
}
}
int distanceNumber = (pointSize-1) * (pointSize-1);
double T2 = sum / distanceNumber / 2; // 平均距离的一半
return T2;
}
//得到的中心点(各点相加求平均)
private Point getCenterPoint(List<Point> points) {
double sumX = 0;
double sumY = 0;
for (Point point : points) {
sumX += point.x;
sumY += point.y;
}
int clusterSize = points.size();
Point centerPoint = new Point(sumX / clusterSize, sumY / clusterSize);
return centerPoint;
}
//获取阈值T2
public double getThreshold() {
return T2;
}
//测试点,进行操作
public static void main(String[] args) {
List<Point> points = new ArrayList<Point>();
points.add(new Point(0, 0));
points.add(new Point(0, 1));
points.add(new Point(1, 0));
points.add(new Point(5, 5));
points.add(new Point(5, 6));
points.add(new Point(6, 5));
points.add(new Point(10, 2));
points.add(new Point(10, 3));
points.add(new Point(11, 4));
points.add(new Point(11, 5));
points.add(new Point(20, 2));
points.add(new Point(11, 25));
points.add(new Point(51, 53));
points.add(new Point(41, 4));
points.add(new Point(8, 52));
points.add(new Point(41, 32));
points.add(new Point(14, 3));
points.add(new Point(16, 44));
points.add(new Point(41,4));
points.add(new Point(44, 4));
points.add(new Point(43, 27));
points.add(new Point(86, 43));
points.add(new Point(51, 56));
points.add(new Point(12, 63));
points.add(new Point(61, 86));
points.add(new Point(54, 87));
points.add(new Point(61, 3));
points.add(new Point(43, 93));
points.add(new Point(54, 67));
points.add(new Point(76, 3));
points.add(new Point(168, 7));
points.add(new Point(41, 88));
points.add(new Point(33, 45));
points.add(new Point(64, 54));
points.add(new Point(71, 76));
points.add(new Point(87, 8));
points.add(new Point(41, 54));
points.add(new Point(33, 55));
points.add(new Point(45, 11));
points.add(new Point(14, 56));
points.add(new Point(91, 76));
Canopy canopy = new Canopy(points);
canopy.cluster();
//获取canopy数目
int clusterNumber = canopy.getClusterNumber();
System.out.println("k="+clusterNumber);
//获取canopy中T2的值
System.out.println("T2="+canopy.getThreshold());
// System.out.println("("+canopy.getCenterPoint(points).getX()+" "+canopy.getCenterPoint(points).getY()+")");
for(int i=0;i<canopy.clusters.size();i++){
List<Point> cluster=canopy.clusters.get(i);
System.out.print("第"+(i+1)+"簇中心点:");
System.out.println("("+canopy.getCenterPoint(cluster).getX()+","+canopy.getCenterPoint(cluster).getY()+")");
System.out.print("第"+(i+1)+"簇内圈包括的点有:");
for(int j=0;j<cluster.size();j++){
Point point=cluster.get(j);
double x=point.getX();
double y=point.getY();
System.out.print("("+x+" "+y+")");
}
System.out.println("");
}
}
}