package edu.zzti.knn;
import org.apache.commons.beanutils.BeanUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.*;
import java.lang.reflect.InvocationTargetException;
import java.util.*;
/**
* Created by wanglei on 2018/4/21.
*/
public class KnnDesign {
private static final int K = 50;
private static final List<Float []> testData = new ArrayList();
static class TrainDistanceMapper extends Mapper<LongWritable, Text, LongWritable, Tuple> {
/**
* 读取测试集数据 封装到 ArrayList里面
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void setup(Context context) throws IOException, InterruptedException {
try {
//Configuration conf = new Configuration();
//URI uri = new URI("hdfs://hadoop01:9000");
//FileSystem fs = FileSystem.get(uri,conf,"hadoop");
FileInputStream inputStream = new FileInputStream("E:\\hdfs_test\\irisdata\\input\\test\\test.txt");
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
while (((line = reader.readLine())!=null)){
String[] strs = line.split(",");
Float[] features = new Float[strs.length];
for (int i = 0; i < features.length; i++) {
features[i] = new Float(strs[i]);
}
testData.add(features);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
*
* 读取训练集数据
* 输出 key是训练集数据的编号
* value是特征标签和到该特征标签的距离
* @param key
* @param value
* @param context
* @throws IOException
* @throws InterruptedException
*/
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] strs = value.toString().split(",");
//将strs 分成两部分 一部分是特征值向量 一个是特征标签
//特征标签
String mark = strs[strs.length-1];
//特征值向量
//为了和测试数据一致 这里空出来 第0位数据
Float[] trainFeatures = new Float[strs.length];
for (int i = 1; i < trainFeatures.length; i++) {
trainFeatures[i] = new Float(strs[i-1]);
}
//求距离 并且输出
for (Float [] testFeatures : testData) {
Float dist = getDistance(trainFeatures,testFeatures);
//输出 每一个测试数据的编号 和 到该训练数据的<mark,距离>
Tuple tuple = new Tuple(mark, dist);
//System.out.println("mapper:"+tuple);
context.write(new LongWritable(testFeatures[0].longValue()),tuple);
}
}
/**
* 求两个向量之间的欧式距离
* 数组的第0位不参与运算
* @param trainFeatures
* @param testFeatures
* @return
*/
private Float getDistance(Float[] trainFeatures, Float[] testFeatures) {
//求差平方 和
float sum = 0;
for (int i = 1; i < trainFeatures.length; i++) {
sum += Math.pow(testFeatures[i]-trainFeatures[i],2);
}
//求开根号
return new Float(Math.sqrt(sum));
}
}
/**
* 根据局部最优是全局最优
* 局部取K值
*/
static class TrainKCombiner extends Reducer<LongWritable,Tuple,LongWritable,Tuple>{
@Override
protected void reduce(LongWritable key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException {
List<Tuple> tuples = new ArrayList<>();
Iterator<Tuple> iterator = values.iterator();
while (iterator.hasNext()){
//必须使用copy 不能使用引用
try {
Tuple tuple = new Tuple();
BeanUtils.copyProperties(tuple,iterator.next());
tuples.add(tuple);
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (InvocationTargetException e) {
e.printStackTrace();
}
}
//如果样本本身就小于 K个直接输出
if (tuples.size()<=K){
for (Tuple tuple : tuples) {
context.write(key,tuple);
}
return;
}
//如果样本大于K个值的话 降序排序 取K值
//按照tuple的距离降序排序
Collections.sort(tuples);
//排序后
System.out.println("排序后"+key+":----");
System.out.println(tuples);
//取k值
for (int i = 0; i < K; i++) {
System.out.println(key+" combiner:"+tuples.get(i));
context.write(key,tuples.get(i));
}
}
}
/**
* 全局取K值
*/
static class TrainKReducder extends Reducer<LongWritable,Tuple,LongWritable,Text>{
@Override
protected void reduce(LongWritable key, Iterable<Tuple> values, Context context) throws IOException, InterruptedException {
//用于单词计数的map集合
Map<String,Integer> wordcount = new TreeMap<>();
List<Tuple> tuples = new ArrayList<>();
Iterator<Tuple> iterator = values.iterator();
while (iterator.hasNext()) {
//必须使用copy 不能使用引用
try {
Tuple tuple = new Tuple();
BeanUtils.copyProperties(tuple, iterator.next());
tuples.add(tuple);
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (InvocationTargetException e) {
e.printStackTrace();
}
}
if(tuples.size() > K){
//如果样本大于K个值的话 降序排序 取K值
//按照tuple的距离降序排序
Collections.sort(tuples);
List list = new ArrayList();
//取k值
for (int i = 0; i < K; i++) {
list.add(tuples.get(i));
}
tuples = list;
}
System.out.println(key+" reducer:"+tuples );
//进行wordcount 统计
for (Tuple tuple : tuples) {
Integer value = wordcount.get(tuple.getMark());
wordcount.put(tuple.getMark(),value == null ? 1 : value+1 );
}
System.out.println("key :"+wordcount);
Integer maxCount = 0;
String label = "";
//求最大值
for (Map.Entry<String, Integer> entry : wordcount.entrySet()) {
if(maxCount < entry.getValue()){
maxCount = entry.getValue();
label = entry.getKey();
}
}
context.write(key,new Text(label));
}
}
public static void main(String[] args) throws IOException, ClassNotFoundExcep
没有合适的资源?快使用搜索试试~ 我知道了~
资源推荐
资源详情
资源评论
收起资源包目录
基于mapreduce的knn分类并行算法的实现.zip (105个子文件)
KnnDesign.java.bak 9KB
KnnDesign$TrainKReducder.class 5KB
KnnDesign$TrainDistanceMapper.class 4KB
KnnDesign$TrainKCombiner.class 4KB
KnnDesign.class 3KB
Tuple.class 2KB
KnnDesign.iml 10KB
KnnDesign.java 9KB
Tuple.java 1KB
mapreduce_test.log 677KB
log4j.properties 523B
log4j.properties 523B
workspace.xml 59KB
uiDesigner.xml 9KB
pom.xml 2KB
misc.xml 1005B
Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_7_4.xml 745B
Maven__org_apache_directory_server_apacheds_kerberos_codec_2_0_0_M15.xml 743B
Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_7_4.xml 731B
Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_7_4.xml 724B
Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_7_4.xml 710B
Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_7_4.xml 703B
Maven__org_apache_hadoop_hadoop_yarn_server_common_2_7_4.xml 689B
Maven__org_apache_directory_server_apacheds_i18n_2_0_0_M15.xml 673B
Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml 668B
Maven__org_apache_htrace_htrace_core_3_1_0_incubating.xml 668B
Maven__commons_configuration_commons_configuration_1_6.xml 663B
Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml 659B
Maven__commons_collections_commons_collections_3_2_2.xml 655B
Maven__org_apache_directory_api_api_asn1_api_1_0_0_M20.xml 654B
Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml 645B
Maven__org_apache_hadoop_hadoop_yarn_common_2_7_4.xml 640B
Maven__org_apache_hadoop_hadoop_annotations_2_7_4.xml 640B
Maven__org_apache_hadoop_hadoop_yarn_client_2_7_4.xml 640B
Maven__org_apache_curator_curator_framework_2_7_1.xml 637B
Maven__com_google_inject_extensions_guice_servlet_3_0.xml 635B
Maven__commons_beanutils_commons_beanutils_1_7_0.xml 633B
Maven__org_apache_commons_commons_compress_1_4_1.xml 630B
Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml 630B
Maven__commons_httpclient_commons_httpclient_3_1.xml 630B
compiler.xml 630B
Maven__org_mortbay_jetty_jetty_sslengine_6_1_26.xml 626B
Maven__org_apache_directory_api_api_util_1_0_0_M20.xml 626B
Maven__org_apache_curator_curator_recipes_2_7_1.xml 623B
Maven__org_apache_hadoop_hadoop_yarn_api_2_7_4.xml 619B
Maven__com_jamesmurty_utils_java_xmlbuilder_0_4.xml 617B
Maven__org_codehaus_jackson_jackson_jaxrs_1_8_3.xml 617B
Maven__org_apache_httpcomponents_httpclient_4_1_2.xml 616B
Maven__org_apache_curator_curator_client_2_7_1.xml 616B
Maven__com_google_protobuf_protobuf_java_2_5_0.xml 613B
Maven__commons_logging_commons_logging_1_1_3.xml 611B
Maven__org_apache_commons_commons_math3_3_1_1.xml 609B
Maven__com_sun_jersey_contribs_jersey_guice_1_9.xml 608B
Maven__commons_digester_commons_digester_1_8.xml 608B
Maven__commons_daemon_commons_daemon_1_0_13.xml 607B
Maven__org_xerial_snappy_snappy_java_1_0_4_1.xml 605B
Maven__org_apache_hadoop_hadoop_client_2_7_4.xml 605B
Maven__org_apache_hadoop_hadoop_common_2_7_4.xml 605B
Maven__org_apache_httpcomponents_httpcore_4_1_2.xml 602B
Maven__com_thoughtworks_paranamer_paranamer_2_3.xml 599B
Maven__org_codehaus_jackson_jackson_xc_1_8_3.xml 596B
Maven__org_apache_hadoop_hadoop_hdfs_2_7_4.xml 591B
Maven__org_apache_hadoop_hadoop_auth_2_7_4.xml 591B
Maven__org_mortbay_jetty_jetty_util_6_1_26.xml 591B
Maven__io_netty_netty_all_4_0_23_Final.xml 590B
Maven__org_apache_zookeeper_zookeeper_3_4_6.xml 589B
Maven__com_sun_xml_bind_jaxb_impl_2_2_3_1.xml 587B
Maven__com_google_code_findbugs_jsr305_3_0_0.xml 584B
Maven__org_slf4j_slf4j_log4j12_1_7_10.xml 580B
Maven__com_sun_jersey_jersey_client_1_9.xml 579B
Maven__com_sun_jersey_jersey_server_1_9.xml 579B
Maven__commons_codec_commons_codec_1_4.xml 575B
Maven__org_codehaus_jettison_jettison_1_1.xml 572B
Maven__org_hamcrest_hamcrest_core_1_3.xml 571B
Maven__javax_xml_stream_stax_api_1_0_2.xml 566B
Maven__javax_activation_activation_1_1.xml 566B
Maven__com_sun_jersey_jersey_json_1_9.xml 565B
Maven__com_sun_jersey_jersey_core_1_9.xml 565B
Maven__commons_lang_commons_lang_2_6.xml 564B
Maven__net_java_dev_jets3t_jets3t_0_9_0.xml 564B
Maven__javax_servlet_servlet_api_2_5.xml 561B
Maven__javax_xml_bind_jaxb_api_2_2_2.xml 558B
Maven__org_mortbay_jetty_jetty_6_1_26.xml 556B
Maven__io_netty_netty_3_6_2_Final.xml 555B
Maven__com_google_code_gson_gson_2_2_4.xml 554B
Maven__commons_cli_commons_cli_1_2.xml 553B
Maven__aopalliance_aopalliance_1_0.xml 553B
Maven__commons_net_commons_net_3_1.xml 553B
Maven__org_slf4j_slf4j_api_1_7_10.xml 552B
Maven__com_google_guava_guava_11_0_2.xml 552B
Maven__javax_inject_javax_inject_1.xml 550B
Maven__javax_servlet_jsp_jsp_api_2_1.xml 549B
Maven__commons_io_commons_io_2_4.xml 542B
Maven__xml_apis_xml_apis_1_3_04.xml 541B
Maven__xerces_xercesImpl_2_9_1.xml 540B
Maven__com_google_inject_guice_3_0.xml 535B
Maven__org_apache_avro_avro_1_7_4.xml 534B
Maven__com_jcraft_jsch_0_1_54.xml 521B
Maven__log4j_log4j_1_2_17.xml 508B
Maven__xmlenc_xmlenc_0_52.xml 505B
共 105 条
- 1
- 2
资源评论
博士僧小星
- 粉丝: 1922
- 资源: 5884
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功