package ymh;
import java.io.*;
import java.util.*;
class UBSession {
HashSet pagesSet = new HashSet(); // 一个会话由多个页面对象组成
}
public class UB {
static ArrayList sessionList = new ArrayList(); // 日志中的会话集合
public static void main(String[] args) throws IOException {
BufferedReader in = new BufferedReader(new FileReader(
"NASASession3.log"));
PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(
"NASAResult1.log")));
PrintWriter outAnalysis = new PrintWriter(new BufferedWriter(
new FileWriter("NASAAnalysis.log")));
UBSession sess = new UBSession(); // 识别出的当前会话
String s;
while ((s = in.readLine()) != null) {
StringTokenizer st = new StringTokenizer(s);
int i = 0;
String userName = null;
String accessTime = null;
String url = null;
while (st.hasMoreTokens()) {
switch (i) {
case 0:
userName = st.nextToken();
break;
case 1:
accessTime = st.nextToken();
break;
case 2:
url = st.nextToken();
break;
}
i++;
}
if (i != 3)
continue;
// 小于1800,表示属于同一个会话
if (Integer.parseInt(accessTime) < 1800) {
// System.out.println(url);
sess.pagesSet.add(url);
} else { // 等于1800,当前会话结束,开始一个新会话
sess.pagesSet.add(url);
Iterator it = sess.pagesSet.iterator();
while (it.hasNext()) {
outAnalysis.println(it.next());
}
outAnalysis.println("\n");
sessionList.add(sess);
sess = new UBSession(); // 清除当前会话内容,为下一次的会话识别作准备
}
}
// System.out.println("-------------------------------------");
int n = sessionList.size();
System.out.println("sessionList.size():" + n);
// 计算相似性矩阵
float simMatrix[][] = new float[n][n];
float simCoef = 0.0f;
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
simCoef = computeSessionSim((UBSession) sessionList.get(i),
(UBSession) sessionList.get(j), outAnalysis);
simMatrix[i][j] = simCoef;
if (i == j && simCoef != 1.0)
System.out.println("i = " + i + "\tj = " + j + "\t"
+ simMatrix[i][j]);
out.print(simCoef + "\t");
}
out.print("\n");
}
// 聚类分析
int isClass[] = new int[n];
int classNum = 0;
float t = 1.0f; // t为阀值
int totalSession = 0;
for (int i = 0; i < n; i++)
isClass[i] = 0;
for (int i = 0; i < n; i++) {
if (isClass[i] == 0) {
int sessNum = 1;
float totalSim = 0.0f;
classNum++;
out.println("\n第" + classNum + "个类:\n");
for (int j = 0; j < n; j++) {
if (simMatrix[i][j] > t) {
sessNum++;
totalSim += simMatrix[i][j];
isClass[j] = 1;
}
}
totalSession += sessNum;
out.println("sessNum = " + sessNum);
out.println("meanSim = " + (float) totalSim / sessNum);
}
}
out.println("\ntotalSession = " + totalSession);
/*
* 测试会话识别正确与否
* System.out.println("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$");
* for (int i = 0; i < sessionList.size(); i++) { Session ses =
* (Session) sessionList.get(i); for (int j = 0; j <
* ses.interestList.size(); j++) { InterestPoint ip = (InterestPoint)
* ses.interestList.get(j); System.out.print(ip.catalog + "\t");
* System.out.print(ip.pagesCount + "\t"); System.out.print(ip.pages);
* System.out.println("InterestPoint***************************"); }
* System.out.println("Session***************************"); }
*/
in.close();
out.close();
outAnalysis.close();
System.out.println("The program is over.");
}
/* 计算会话之间的相似性 */
static float computeSessionSim(UBSession s1, UBSession s2,
PrintWriter outAnalysis) {
float SessionSim = 0.0f;
int size1 = s1.pagesSet.size();
int size2 = s2.pagesSet.size();
int commonPages = 0;
// System.out.println("size1:\t" + size1);
// System.out.println("size2:\t" + size2);
// System.out.println("size1:\t" + s1.interestList.size());
// System.out.println("size2:\t" + s2.interestList.size());
Iterator it = s1.pagesSet.iterator();
while (it.hasNext()) {
if (s2.pagesSet.contains(it.next()))
commonPages++;
}
SessionSim = (float) commonPages / (float) Math.sqrt(size1 * size2);
return SessionSim;
}
}