/**
*
*/
package com.kang.util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.SequenceFile.Writer.Option;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.util.ReflectionUtils;
import com.kang.fastcluster.keytype.DoubleArrStrWritable;
import com.kang.fastcluster.keytype.DoublePairWritable;
import com.kang.fastcluster.keytype.IntDoublePairWritable;
import com.kang.filter.keytype.DoubleArrIntWritable;
import com.kang.model.CurrentJobInfo;
import com.kang.model.UserData;
import com.kang.model.UserGroup;
/**
* Hadoop 工具类
*
*/
public class HUtils {
public static final double VERYSMALL = 0.00000000000000000000000000001;
// pre filter
// 最原始user.xml文件在HDFS上的存储路径
public static final String SOURCEFILE = "/user/root/_source/source_users.xml";
// 过滤文件夹
public static final String FILTER = "/user/root/_filter";
public static final String FILTER_DEDUPLICATE = FILTER + "/"
+ "deduplicate";
public static final String FILTER_GETATTRIBUTES = FILTER + "/"
+ "getattributes";// 属性提取
public static final String FILTER_GETMAXMIN = FILTER + "/" + "getmaxmin";// 获得列最大最小值
public static final String FILTER_NORMALIZATION = FILTER + "/"
+ "normalization";// 归一化
public static final String FILTER_FINDINITDC = FILTER + "/" + "findinitdc";// 寻找dc阈值
public static final String FILTER_PREPAREVECTORS = FILTER + "/"
+ "preparevectors";// 准备距离向量
public static final int FILTER_PREPAREVECTORS_FILES = 4;// 由数据库到hdfs产生的文件数
public static final String FILTER_CALDISTANCE = FILTER + "/"
+ "caldistance";// 计算向量之间的距离
public static final String DEDUPLICATE_LOCAL = "WEB-INF/classes/deduplicate_users.xml";
public static final String LOCALCENTERFILE="WEB-INF/classes/centervector.dat";// 本地中心点文件
public static final String MAP_COUNTER = "MAP_COUNTER";
public static final String REDUCE_COUNTER = "REDUCE_COUNTER";
public static final String REDUCE_COUNTER2 = "REDUCE_COUNTER2";
public static final String DOWNLOAD_EXTENSION = ".dat";// 下载文件的后缀名
public static double DELTA_DC = 0.0;// DC阈值,
public static long INPUT_RECORDS = 0L;// 文件全部记录数,任务FindInitDCJob任务后对此值进行赋值
// 聚类分类
public static long CLUSTERED=-1;
public static long UNCLUSTERED=-1;
// fast cluster
public static final String LOCALDENSITYOUTPUT = "/user/root/localdensity";
public static final String DELTADISTANCEOUTPUT = "/user/root/deltadistance";
public static final String DELTADISTANCEBIN = "/user/root/deltadistance.bin";// 局部密度最大向量id存储路径
public static final String SORTOUTPUT = "/user/root/sort";
public static final String FIRSTCENTERPATH = "/user/root/_center/iter_0/clustered/part-m-00000";
public static final String FIRSTUNCLUSTEREDPATH = "/user/root/_center/iter_0/unclustered";
public static final String CENTERPATH = "/user/root/_center";
public static final String CENTERPATHPREFIX = "/user/root/_center/iter_";
private static Configuration conf = null;
private static FileSystem fs = null;
public static boolean flag = true; // get configuration from db or file
// ,true : db,false:file
public static int JOBNUM = 1; // 一组job的个数
// 第一个job的启动时间阈值,大于此时间才是要取得的的真正监控任务
private static long jobStartTime = 0L;// 使用 System.currentTimeMillis() 获得
private static JobClient jobClient = null;
public static Configuration getConf() {
if (conf == null) {
conf = new Configuration();
// get configuration from db or file
conf.setBoolean("mapreduce.app-submission.cross-platform", "true"
.equals(Utils.getKey(
"mapreduce.app-submission.cross-platform", flag)));// 配置使用跨平台提交任务
conf.set("fs.defaultFS", Utils.getKey("fs.defaultFS", flag));// 指定namenode
conf.set("mapreduce.framework.name",
Utils.getKey("mapreduce.framework.name", flag)); // 指定使用yarn框架
conf.set("yarn.resourcemanager.address",
Utils.getKey("yarn.resourcemanager.address", flag)); // 指定resourcemanager
conf.set("yarn.resourcemanager.scheduler.address", Utils.getKey(
"yarn.resourcemanager.scheduler.address", flag));// 指定资源分配器
conf.set("mapreduce.jobhistory.address",
Utils.getKey("mapreduce.jobhistory.address", flag));
}
return conf;
}
public static FileSystem getFs() {
if (fs == null) {
try {
fs = FileSystem.get(getConf());
} catch (IOException e) {
e.printStackTrace();
}
}
return fs;
}
/**
* 获取hdfs文件目录及其子文件夹信息
*
* @param input
* @param recursive
* @return
* @throws IOException
*/
public static String getHdfsFiles(String input, boolean recursive)
throws IOException {
RemoteIterator<LocatedFileStatus> files = getFs().listFiles(
new Path(input), recursive);
StringBuffer buff = new StringBuffer();
while (files.hasNext()) {
buff.append(files.next().getPath().toString()).append("<br>");
}
return buff.toString();
}
/**
* 根据时间来判断,然后获得Job的状态,以此来进行监控 Job的启动时间和使用system.currentTimeMillis获得的时间是一致的,
*
*
* @return
* @throws IOException
*/
public static List<CurrentJobInfo> getJobs() throws IOException {
JobStatus[] jss = getJobClient().getAllJobs();//返回所有的Job,不管是失败还是成功的
List<CurrentJobInfo> jsList = new ArrayList<CurrentJobInfo>();
jsList.clear();
for (JobStatus js : jss) {
if (js.getStartTime() > jobStartTime) {//只查找任务启动时间在jobStartTime之后的任务
jsList.add(new CurrentJobInfo(getJobClient().getJob(
js.getJobID()), js.getStartTime(), js.getRunState()));
}
}
Collections.sort(jsList);
return jsList;
}
public static void printJobStatus(JobStatus js) {
System.out.println(new java.util.Date() + ":jobId:"
+ js.getJobID().toString() + ",map:" + js.getMapProgress()
+ ",reduce:" + js.getReduceProgress() + ",finish:"
+ js.getRunState());
}
/**
* @return the jobClient
*/
public static JobClient getJobClient() {
if (jobClient == null) {
try {
jobClient = new JobClient(getConf());
} catch (IOException e) {
e.printStackTrace();
}
}
return jobClient;
}
/**
* @param jobClient
* the jobClient to set
*/
public static void setJobClient(JobClient jobClient) {
HUtils.jobClient = jobClient;
}
public static long getJobStartTime() {
return jobStartTime;
}
public static void setJobStartTime(long jobStartTime) {
HUtils.jobStartTime = jobStartTime;
}
/**
* 判断一组MR任务是否完成
*
* @param currentJobInfo
* @return
*/
public static String hasFinished(CurrentJobInfo currentJobInfo) {
if (currentJo
没有合适的资源?快使用搜索试试~ 我知道了~
基于Hadoop的好友推荐系统的设计与实现+部署文档+全部资料 高分项目.zip
共2000个文件
png:1260个
css:403个
jar:88个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 47 浏览量
2024-05-13
18:10:00
上传
评论 1
收藏 79.5MB ZIP 举报
温馨提示
【资源说明】 基于Hadoop的好友推荐系统的设计与实现+部署文档+全部资料 高分项目.zip基于Hadoop的好友推荐系统的设计与实现+部署文档+全部资料 高分项目.zip 【备注】 1、该项目是个人高分项目源码,已获导师指导认可通过,答辩评审分达到95分 2、该资源内项目代码都经过测试运行成功,功能ok的情况下才上传的,请放心下载使用! 3、本项目适合计算机相关专业(人工智能、通信工程、自动化、电子信息、物联网等)的在校学生、老师或者企业员工下载使用,也可作为毕业设计、课程设计、作业、项目初期立项演示等,当然也适合小白学习进阶。 4、如果基础还行,可以在此代码基础上进行修改,以实现其他功能,也可直接用于毕设、课设、作业等。 欢迎下载,沟通交流,互相学习,共同进步!
资源推荐
资源详情
资源评论
收起资源包目录
基于Hadoop的好友推荐系统的设计与实现+部署文档+全部资料 高分项目.zip (2000个子文件)
HUtils.class 31KB
CloudAction.class 12KB
DBService.class 12KB
Utils.class 11KB
DrawPic.class 9KB
BaseDAOImpl.class 9KB
ClusterDataMapper.class 8KB
DeltaDistanceMapper.class 8KB
CalDistanceMapper.class 6KB
FindInitDCMapper.class 6KB
UserData.class 5KB
DeltaDistanceReducer.class 5KB
RunCluster2.class 4KB
CalDistanceJob.class 4KB
DBAction.class 4KB
FindInitDCJob.class 4KB
ClusterDataJob.class 4KB
LocalDensityJob.class 3KB
DoubleArrIntWritable.class 3KB
CurrentJobInfo.class 3KB
DoubleArrStrWritable.class 3KB
FindInitReducer.class 3KB
DeltaDistanceJob.class 3KB
LocalDensityMapper.class 3KB
RunCluster1.class 3KB
SortJob.class 3KB
DeduplicateReducer.class 3KB
TestService.class 3KB
SortJob$SortReducer.class 3KB
DoubleArrWritable.class 3KB
IntDoublePairWritable.class 3KB
LocalDensityReducer.class 3KB
CustomDoubleWritable.class 3KB
DeduplicateJob.class 3KB
CalDistanceReducer.class 3KB
IntPairWritable.class 2KB
DoublePairWritable.class 2KB
SortJob$SortMapper.class 2KB
BaseDAO.class 2KB
HConstants.class 2KB
DeduplicateMapper.class 2KB
LoginUser.class 2KB
UserGroup.class 2KB
StringPairWritable.class 2KB
LoginUserAction.class 2KB
ApplicationListenerImpl.class 2KB
IDistanceDensityMul.class 2KB
TestAction.class 2KB
CalDistance.class 1KB
Deduplicate.class 1KB
FilterCounter.class 1KB
ClusterCounter.class 1KB
SpringUtil.class 999B
CustomDoubleWritable$Comparator.class 857B
ObjectInterface.class 285B
.classpath 1KB
org.eclipse.wst.common.component 682B
org.eclipse.wst.jsdt.ui.superType.container 49B
icon.css 111KB
easyui.css 51KB
easyui.css 51KB
easyui.css 50KB
easyui.css 48KB
easyui.css 48KB
easyui.css 48KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 44KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
共 2000 条
- 1
- 2
- 3
- 4
- 5
- 6
- 20
资源评论
不走小道
- 粉丝: 3237
- 资源: 5113
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功