/**
*
*/
package com.kang.util;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.SequenceFile.Writer;
import org.apache.hadoop.io.SequenceFile.Writer.Option;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.util.ReflectionUtils;
import com.kang.fastcluster.keytype.DoubleArrStrWritable;
import com.kang.fastcluster.keytype.DoublePairWritable;
import com.kang.fastcluster.keytype.IntDoublePairWritable;
import com.kang.filter.keytype.DoubleArrIntWritable;
import com.kang.model.CurrentJobInfo;
import com.kang.model.UserData;
import com.kang.model.UserGroup;
/**
* Hadoop 工具类
*
*/
public class HUtils {
public static final double VERYSMALL = 0.00000000000000000000000000001;
// pre filter
// 最原始user.xml文件在HDFS上的存储路径
public static final String SOURCEFILE = "/user/root/_source/source_users.xml";
// 过滤文件夹
public static final String FILTER = "/user/root/_filter";
public static final String FILTER_DEDUPLICATE = FILTER + "/"
+ "deduplicate";
public static final String FILTER_GETATTRIBUTES = FILTER + "/"
+ "getattributes";// 属性提取
public static final String FILTER_GETMAXMIN = FILTER + "/" + "getmaxmin";// 获得列最大最小值
public static final String FILTER_NORMALIZATION = FILTER + "/"
+ "normalization";// 归一化
public static final String FILTER_FINDINITDC = FILTER + "/" + "findinitdc";// 寻找dc阈值
public static final String FILTER_PREPAREVECTORS = FILTER + "/"
+ "preparevectors";// 准备距离向量
public static final int FILTER_PREPAREVECTORS_FILES = 4;// 由数据库到hdfs产生的文件数
public static final String FILTER_CALDISTANCE = FILTER + "/"
+ "caldistance";// 计算向量之间的距离
public static final String DEDUPLICATE_LOCAL = "WEB-INF/classes/deduplicate_users.xml";
public static final String LOCALCENTERFILE="WEB-INF/classes/centervector.dat";// 本地中心点文件
public static final String MAP_COUNTER = "MAP_COUNTER";
public static final String REDUCE_COUNTER = "REDUCE_COUNTER";
public static final String REDUCE_COUNTER2 = "REDUCE_COUNTER2";
public static final String DOWNLOAD_EXTENSION = ".dat";// 下载文件的后缀名
public static double DELTA_DC = 0.0;// DC阈值,
public static long INPUT_RECORDS = 0L;// 文件全部记录数,任务FindInitDCJob任务后对此值进行赋值
// 聚类分类
public static long CLUSTERED=-1;
public static long UNCLUSTERED=-1;
// fast cluster
public static final String LOCALDENSITYOUTPUT = "/user/root/localdensity";
public static final String DELTADISTANCEOUTPUT = "/user/root/deltadistance";
public static final String DELTADISTANCEBIN = "/user/root/deltadistance.bin";// 局部密度最大向量id存储路径
public static final String SORTOUTPUT = "/user/root/sort";
public static final String FIRSTCENTERPATH = "/user/root/_center/iter_0/clustered/part-m-00000";
public static final String FIRSTUNCLUSTEREDPATH = "/user/root/_center/iter_0/unclustered";
public static final String CENTERPATH = "/user/root/_center";
public static final String CENTERPATHPREFIX = "/user/root/_center/iter_";
private static Configuration conf = null;
private static FileSystem fs = null;
public static boolean flag = true; // get configuration from db or file
// ,true : db,false:file
public static int JOBNUM = 1; // 一组job的个数
// 第一个job的启动时间阈值,大于此时间才是要取得的的真正监控任务
private static long jobStartTime = 0L;// 使用 System.currentTimeMillis() 获得
private static JobClient jobClient = null;
public static Configuration getConf() {
if (conf == null) {
conf = new Configuration();
// get configuration from db or file
conf.setBoolean("mapreduce.app-submission.cross-platform", "true"
.equals(Utils.getKey(
"mapreduce.app-submission.cross-platform", flag)));// 配置使用跨平台提交任务
conf.set("fs.defaultFS", Utils.getKey("fs.defaultFS", flag));// 指定namenode
conf.set("mapreduce.framework.name",
Utils.getKey("mapreduce.framework.name", flag)); // 指定使用yarn框架
conf.set("yarn.resourcemanager.address",
Utils.getKey("yarn.resourcemanager.address", flag)); // 指定resourcemanager
conf.set("yarn.resourcemanager.scheduler.address", Utils.getKey(
"yarn.resourcemanager.scheduler.address", flag));// 指定资源分配器
conf.set("mapreduce.jobhistory.address",
Utils.getKey("mapreduce.jobhistory.address", flag));
}
return conf;
}
public static FileSystem getFs() {
if (fs == null) {
try {
fs = FileSystem.get(getConf());
} catch (IOException e) {
e.printStackTrace();
}
}
return fs;
}
/**
* 获取hdfs文件目录及其子文件夹信息
*
* @param input
* @param recursive
* @return
* @throws IOException
*/
public static String getHdfsFiles(String input, boolean recursive)
throws IOException {
RemoteIterator<LocatedFileStatus> files = getFs().listFiles(
new Path(input), recursive);
StringBuffer buff = new StringBuffer();
while (files.hasNext()) {
buff.append(files.next().getPath().toString()).append("<br>");
}
return buff.toString();
}
/**
* 根据时间来判断,然后获得Job的状态,以此来进行监控 Job的启动时间和使用system.currentTimeMillis获得的时间是一致的,
*
*
* @return
* @throws IOException
*/
public static List<CurrentJobInfo> getJobs() throws IOException {
JobStatus[] jss = getJobClient().getAllJobs();//返回所有的Job,不管是失败还是成功的
List<CurrentJobInfo> jsList = new ArrayList<CurrentJobInfo>();
jsList.clear();
for (JobStatus js : jss) {
if (js.getStartTime() > jobStartTime) {//只查找任务启动时间在jobStartTime之后的任务
jsList.add(new CurrentJobInfo(getJobClient().getJob(
js.getJobID()), js.getStartTime(), js.getRunState()));
}
}
Collections.sort(jsList);
return jsList;
}
public static void printJobStatus(JobStatus js) {
System.out.println(new java.util.Date() + ":jobId:"
+ js.getJobID().toString() + ",map:" + js.getMapProgress()
+ ",reduce:" + js.getReduceProgress() + ",finish:"
+ js.getRunState());
}
/**
* @return the jobClient
*/
public static JobClient getJobClient() {
if (jobClient == null) {
try {
jobClient = new JobClient(getConf());
} catch (IOException e) {
e.printStackTrace();
}
}
return jobClient;
}
/**
* @param jobClient
* the jobClient to set
*/
public static void setJobClient(JobClient jobClient) {
HUtils.jobClient = jobClient;
}
public static long getJobStartTime() {
return jobStartTime;
}
public static void setJobStartTime(long jobStartTime) {
HUtils.jobStartTime = jobStartTime;
}
/**
* 判断一组MR任务是否完成
*
* @param currentJobInfo
* @return
*/
public static String hasFinished(CurrentJobInfo currentJobInfo) {
if (currentJo
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计),该项目是个人毕设项目,答辩评审分达到98分,代码都经过调试测试,确保可以运行!欢迎下载使用,可用于小白学习、进阶。该资源主要针对计算机、通信、人工智能、自动化等相关专业的学生、老师或从业者下载使用,亦可作为期末课程设计、课程大作业、毕业设计等。项目整体具有较高的学习借鉴价值!基础能力强的可以在此基础上修改调整,以实现不同的功能。 基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计)基于
资源推荐
资源详情
资源评论
收起资源包目录
基于Hadoop实现的好友推荐系统源码+文档说明(毕业设计) (2000个子文件)
HUtils.class 31KB
CloudAction.class 12KB
DBService.class 12KB
Utils.class 11KB
DrawPic.class 9KB
BaseDAOImpl.class 9KB
ClusterDataMapper.class 8KB
DeltaDistanceMapper.class 8KB
CalDistanceMapper.class 6KB
FindInitDCMapper.class 6KB
UserData.class 5KB
DeltaDistanceReducer.class 5KB
RunCluster2.class 4KB
CalDistanceJob.class 4KB
DBAction.class 4KB
FindInitDCJob.class 4KB
ClusterDataJob.class 4KB
LocalDensityJob.class 3KB
DoubleArrIntWritable.class 3KB
CurrentJobInfo.class 3KB
DoubleArrStrWritable.class 3KB
FindInitReducer.class 3KB
DeltaDistanceJob.class 3KB
LocalDensityMapper.class 3KB
RunCluster1.class 3KB
SortJob.class 3KB
DeduplicateReducer.class 3KB
TestService.class 3KB
SortJob$SortReducer.class 3KB
DoubleArrWritable.class 3KB
IntDoublePairWritable.class 3KB
LocalDensityReducer.class 3KB
CustomDoubleWritable.class 3KB
DeduplicateJob.class 3KB
CalDistanceReducer.class 3KB
IntPairWritable.class 2KB
DoublePairWritable.class 2KB
SortJob$SortMapper.class 2KB
BaseDAO.class 2KB
HConstants.class 2KB
DeduplicateMapper.class 2KB
LoginUser.class 2KB
UserGroup.class 2KB
StringPairWritable.class 2KB
LoginUserAction.class 2KB
ApplicationListenerImpl.class 2KB
IDistanceDensityMul.class 2KB
TestAction.class 2KB
CalDistance.class 1KB
Deduplicate.class 1KB
FilterCounter.class 1KB
ClusterCounter.class 1KB
SpringUtil.class 999B
CustomDoubleWritable$Comparator.class 857B
ObjectInterface.class 285B
.classpath 1KB
org.eclipse.wst.common.component 682B
org.eclipse.wst.jsdt.ui.superType.container 49B
icon.css 111KB
easyui.css 51KB
easyui.css 51KB
easyui.css 50KB
easyui.css 48KB
easyui.css 48KB
easyui.css 48KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 47KB
easyui.css 44KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 8KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
tabs.css 6KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
datagrid.css 5KB
共 2000 条
- 1
- 2
- 3
- 4
- 5
- 6
- 20
资源评论
yava_free
- 粉丝: 4776
- 资源: 1849
下载权益
C知道特权
VIP文章
课程特权
开通VIP
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功