package org.hadoop;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.fz.algorithm.hadoop.model.Text2vectorVo;
import util.hadoop.HadoopUtil;
/**
* transform the text to vector
* @author fansy
*
*/
public class Text2VectorDriver {
private final static String SPLIT ="split";
private static Log log=LogFactory.getLog(Text2VectorDriver.class);
public boolean run(Text2vectorVo text2vectorVo) throws IOException, ClassNotFoundException, InterruptedException{
Configuration conf=new Configuration();
conf.set("mapred.job.tracker", HadoopUtil.getJobTracker());
conf.set(SPLIT, text2vectorVo.getSplit());
Job job=new Job(conf,"text2vector with input"+text2vectorVo.getInput());
job.setJarByClass(Text2VectorDriver.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setReducerClass(T2VReducer.class);
job.setMapperClass(T2VMapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(VectorWritable.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(VectorWritable.class);
FileInputFormat.addInputPath(job, new Path(text2vectorVo.getInput()));
SequenceFileOutputFormat.setOutputPath(job, new Path(text2vectorVo.getOutput()));
boolean flag=job.waitForCompletion(true);
return flag;
}
public static class T2VMapper extends Mapper<LongWritable,Text,LongWritable,VectorWritable>{
private String split=",";
@Override
public void setup(Context cxt){
if(cxt.getConfiguration().get(SPLIT)!=null){
split=cxt.getConfiguration().get(SPLIT);
}
}
@Override
public void map(LongWritable key,Text line,Context cxt)throws IOException,InterruptedException{
String[] str=line.toString().split(split); // split data
Vector vector=new RandomAccessSparseVector(str.length);
for(int i=0;i<str.length;i++){
double tempVal=-1;
try {
tempVal = Double.parseDouble(str[i]);
} catch (Exception e) {
log.info("Exception when parsing string to double,with error:"+e.getMessage());
return;
}
vector.set(i, tempVal);
}
VectorWritable va=new VectorWritable(vector);
cxt.write(key, va);
}
}
public static class T2VReducer extends Reducer<LongWritable,VectorWritable,LongWritable,VectorWritable>{
public void reduce(LongWritable key,Iterable<VectorWritable> values,Context context)throws IOException,InterruptedException{
for(VectorWritable v:values){
context.write(key, v);
}
}
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
web 工程调用hadoop集群1.3版本,使用structs2框架,把WEB-INF/lib下面的hadoop-fz1.3.jar拷贝到hadoop集群的lib下面,然后就可以运行了,暂时只支持text2vector算法。具体参考http://blog.csdn.net/fansy1990中相应blog。
资源推荐
资源详情
资源评论
收起资源包目录
web 工程调用hadoop集群1.3 (106个子文件)
Text2VectorDriver$T2VMapper.class 4KB
SetupService.class 3KB
Text2VectorDriver.class 3KB
Text2VectorDriver$T2VReducer.class 2KB
SchemeAlgorithm.class 2KB
Text2vectorService.class 1KB
KMeansAction.class 1KB
Text2vectorAction.class 1KB
HadoopUtil.class 1KB
SetupVo.class 1KB
SetupAction.class 1KB
KMeansCluster.class 1KB
Text2vectorVo.class 950B
AlgorithmUtils.class 868B
IAlgorithm.class 189B
.classpath 599B
org.eclipse.wst.common.component 476B
org.eclipse.wst.jsdt.ui.superType.container 49B
bootstrap.css 117KB
font-awesome-ie7.min.css 30KB
buttons.css 26KB
font-awesome.min.css 19KB
bootstrap-theme.css 16KB
home.css 1KB
base.css 928B
register.css 598B
login.css 518B
.DS_Store 6KB
.DS_Store 6KB
fontawesome-webfont.eot 29KB
home.html 3KB
shouye.html 2KB
setup.html 1KB
text2vector.html 1KB
modifyInfo.html 592B
setup_error.html 501B
setup_success.html 452B
seq2text.html 447B
mahout-examples-0.7-job.jar 28.65MB
mahout-core-0.7-job.jar 10.83MB
hadoop-core-1.0.4.jar 3.75MB
hadoop-test-1.0.4.jar 2.53MB
mahout-core-0.7.jar 1.44MB
mahout-math-0.7.jar 1.26MB
freemarker-2.3.19.jar 909KB
struts2-core-2.3.15.1.jar 783KB
xwork-core-2.3.15.1.jar 625KB
javassist-3.11.0.GA.jar 600KB
log4j-1.2.17.jar 478KB
commons-lang3-3.1.jar 308KB
mahout-integration-0.7.jar 305KB
hadoop-tools-1.0.4.jar 281KB
mahout-examples-0.7.jar 252KB
ognl-3.0.6.jar 223KB
commons-io-2.0.1.jar 156KB
hadoop-examples-1.0.4.jar 139KB
commons-fileupload-1.3.jar 67KB
commons-logging-1.1.3.jar 61KB
asm-3.3.jar 43KB
asm-commons-3.3.jar 37KB
asm-tree-3.3.jar 21KB
hadoop-fz1.3.jar 14KB
hadoop-ant-1.0.4.jar 7KB
hadoop-minicluster-1.0.4.jar 413B
hadoop-client-1.0.4.jar 410B
Text2VectorDriver.java 3KB
SetupService.java 1KB
SchemeAlgorithm.java 1KB
Text2vectorService.java 1KB
Text2vectorAction.java 1021B
KMeansAction.java 968B
HadoopUtil.java 895B
SetupAction.java 800B
KMeansCluster.java 719B
SetupVo.java 613B
AlgorithmUtils.java 604B
Text2vectorVo.java 505B
IAlgorithm.java 286B
jQuery.js 90KB
bootstrap.js 57KB
buttons.js 3KB
home.js 945B
.jsdtscope 500B
setup.jsp 2KB
text2vector.jsp 2KB
canopy.jsp 1KB
kmeans.jsp 1KB
error.jsp 990B
running.jsp 962B
MANIFEST.MF 36B
.mymetadata 315B
org.eclipse.wst.jsdt.ui.superType.name 6B
FontAwesome.otf 49KB
com.genuitec.eclipse.j2eedt.core.prefs 1KB
org.eclipse.jdt.core.prefs 629B
org.eclipse.core.resources.prefs 162B
.project 2KB
log4j.properties 214B
log4j.properties 214B
fontawesome-webfont.svg 158KB
共 106 条
- 1
- 2
fansy1990
- 粉丝: 1664
- 资源: 60
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- java学籍管理系统源码带本地搭建教程数据库 MySQL源码类型 WebForm
- php+mysql学生成绩查询(系统)
- Bazel 的 Go 规则.zip
- Android ListView下拉刷新 Demo.zip
- Python实现基于一维卷积神经网络(1D-CNN)的多元时间序列分类源码+文档说明
- java基于springBoot课程评价系统源码数据库 MySQL源码类型 WebForm
- 5G工业无线路由器说明书
- SunshineLife个人博客是基于django+mysql+layui的小型个人博客网站
- 5G终端拔号脚本(AT), 适用于MT5701
- 基于Python + Django的生鲜超市系统 调用alipay沙盒系统支付
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
- 1
- 2
- 3
前往页