package dianxinProject;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.filecache.DistributedCache;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.*;
/*
2、行为匹配输出数据
*/
public class Step2_action_match {
// 重写Com555arator方法,使匹配级别为3的都在匹配级别为2的前面...
public static class tm_Comparator implements Comparator<String> {
public int compare(String o1, String o2) {
return o2.compareTo(o1);
}
}
public static class MyMap extends Mapper<LongWritable,Text,Text,NullWritable> {
// Map<一级域名,TreeMap<匹配级别+匹配地址, 需要的字段>>,
private static Map<String, TreeMap<String, String>> joinData = new HashMap<String, TreeMap<String, String>>();
protected void setup(Context context) throws IOException, InterruptedException {
// 读取匹配文件的数据
// BufferedReader br = new BufferedReader(new FileReader("t_dx_basic_msg_addr.txt"));
Path[] paths = DistributedCache.getLocalCacheFiles(context.getConfiguration());
// 我们这里只缓存了一个文件,所以取第一个即可,创建BufferReader去读取
BufferedReader br = new BufferedReader(new FileReader(paths[0].toString()));
String line = "";
while ((line = br.readLine()) != null) {
String lines[] = line.split("\\|",-1);
// 一级域名作为map的key9
String m_key = lines[1];
// 如果key不存在,则new TreeMap()后,直接添加到joinData中
if (!joinData.containsKey(m_key)) {
TreeMap<String, String> tm = new TreeMap<String, String>(new tm_Comparator());
tm.put(lines[3] + "|" + lines[2], lines[0] + "|" + lines[4] + "|" + lines[5]);
joinData.put(m_key, tm);
} else {
// 如果key已存在,则添加到对应的TreeMap中
// domain = 匹配级别 | 匹配地址 | 行为ID | 是否产品 | 预购类型
joinData.get(m_key).put(lines[3] + "|" + lines[2], lines[0] + "|" + lines[4] + "|" + lines[5]);
}
}
br.close();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String lines[] = value.toString().split("\\|",-1);
String user_ID = lines[0];
String domain = lines[1];
String url = lines[2];
TreeMap<String, String> tm = joinData.get(domain);
// 先判断key是否存在,若存在则得到TreeMap
if (tm != null) {
Set<String> set = tm.keySet();
String myData = "";
// 得到TreeMap,并获取key,对key做迭代
for (String s : set) {
String match = s.split("\\|")[1];
if (url.contains(match)) {
myData = tm.get(s);
break;
}
}
if (myData.length() != 0) {
String[] s = myData.split("\\|");
context.write(new Text(s[0] + "|" + user_ID + "|" + s[1] + "|" + url + "|" + s[2]), NullWritable.get());
}
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://master:9000");
Job job = Job.getInstance(conf);
job.addCacheFile(new URI("/dx_proj/data/peizhi/t_dx_basic_msg_addr.txt"));
// job.addCacheFile(new URI(args[0]));
job.setJarByClass(Step2_action_match.class);
job.setMapperClass(MyMap.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
// FileInputFormat.addInputPath(job, new Path(args[1]));
// FileOutputFormat.setOutputPath(job, new Path(args[2]));
FileInputFormat.addInputPath(job, new Path("file:/F:\\dx_proj\\output\\step11"));
FileOutputFormat.setOutputPath(job, new Path("file:/F:\\dx_proj\\output\\step22"));
job.waitForCompletion(true);
}
}
没有合适的资源?快使用搜索试试~ 我知道了~
毕设项目-基于电信数据的大数据分析.zip
共193个文件
xml:176个
java:12个
scala:3个
1.该资源内容由用户上传,如若侵权请联系客服进行举报
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
2.虚拟产品一经售出概不退款(资源遇到问题,请及时私信上传者)
版权申诉
0 下载量 38 浏览量
2023-10-26
20:21:21
上传
评论
收藏 112KB ZIP 举报
温馨提示
matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行! matlab算法,毕设、课设程序,全部源码均已进行严格测试,可以直接运行!
资源推荐
资源详情
资源评论
收起资源包目录
毕设项目-基于电信数据的大数据分析.zip (193个子文件)
dianxinProject.iml 18KB
Step2_action_match.java 5KB
Step9_user_portrait_count.java 4KB
Step10_order_buy_user.java 4KB
Step8_user_action_top.java 3KB
Step1_clearout.java 3KB
Step5_product_match.java 3KB
Step4_action_count.java 3KB
Step7_product_count.java 3KB
Step3_user_action_count.java 3KB
Step6_user_product_count.java 2KB
Tools.java 718B
Tffffg.java 0B
ss.md 37B
Step12_ALS.scala 4KB
Step14_join_rule.scala 3KB
Step15_join_rule_recommend.scala 3KB
workspace.xml 34KB
uiDesigner.xml 9KB
pom.xml 2KB
Maven__org_glassfish_jersey_containers_jersey_container_servlet_core_2_22_2.xml 729B
Maven__org_apache_hadoop_hadoop_mapreduce_client_jobclient_2_7_1.xml 694B
Maven__org_glassfish_jersey_containers_jersey_container_servlet_2_22_2.xml 694B
Maven__org_apache_directory_server_apacheds_kerberos_codec_2_0_0_M15.xml 692B
Maven__org_scala_lang_modules_scala_parser_combinators_2_11_1_0_1.xml 686B
Maven__com_fasterxml_jackson_module_jackson_module_scala_2_11_2_6_5.xml 682B
Maven__org_glassfish_hk2_external_aopalliance_repackaged_2_4_0_b34.xml 681B
Maven__org_apache_hadoop_hadoop_mapreduce_client_shuffle_2_7_1.xml 680B
Maven__com_fasterxml_jackson_module_jackson_module_paranamer_2_6_5.xml 675B
Maven__org_apache_hadoop_hadoop_mapreduce_client_common_2_7_1.xml 673B
Maven__org_apache_hadoop_hadoop_mapreduce_client_core_2_7_1.xml 659B
Maven__org_apache_hadoop_hadoop_mapreduce_client_app_2_7_1.xml 652B
Maven__org_apache_parquet_parquet_format_2_3_0_incubating.xml 642B
Maven__org_glassfish_jersey_bundles_repackaged_jersey_guava_2_22_2.xml 642B
Maven__org_apache_spark_spark_network_shuffle_2_11_2_1_2.xml 641B
compiler.xml 640B
Maven__org_apache_hadoop_hadoop_yarn_server_common_2_7_1.xml 638B
Maven__org_apache_spark_spark_network_common_2_11_2_1_2.xml 634B
Maven__com_fasterxml_jackson_core_jackson_annotations_2_6_5.xml 632B
Maven__org_glassfish_jersey_media_jersey_media_jaxb_2_22_2.xml 625B
Maven__org_apache_directory_server_apacheds_i18n_2_0_0_M15.xml 622B
Maven__commons_beanutils_commons_beanutils_core_1_8_0.xml 617B
Maven__org_apache_htrace_htrace_core_3_1_0_incubating.xml 617B
Maven__org_apache_spark_spark_mllib_local_2_11_2_1_2.xml 613B
Maven__commons_configuration_commons_configuration_1_6.xml 612B
Maven__com_fasterxml_jackson_core_jackson_databind_2_6_5.xml 611B
Maven__org_glassfish_hk2_external_javax_inject_2_4_0_b34.xml 611B
Maven__org_glassfish_hk2_osgi_resource_locator_1_0_1.xml 610B
Maven__org_codehaus_jackson_jackson_mapper_asl_1_9_13.xml 608B
Maven__commons_collections_commons_collections_3_2_1.xml 604B
Maven__org_apache_directory_api_api_asn1_api_1_0_0_M20.xml 603B
Maven__org_apache_spark_spark_streaming_2_11_2_1_2.xml 599B
Maven__javax_validation_validation_api_1_1_0_Final.xml 599B
Maven__org_codehaus_jackson_jackson_core_asl_1_9_13.xml 594B
Maven__org_glassfish_jersey_core_jersey_server_2_22_2.xml 593B
Maven__org_glassfish_jersey_core_jersey_client_2_22_2.xml 593B
Maven__org_glassfish_jersey_core_jersey_common_2_22_2.xml 593B
Maven__org_apache_spark_spark_launcher_2_11_2_1_2.xml 592B
Maven__org_apache_spark_spark_catalyst_2_11_2_1_2.xml 592B
Maven__io_dropwizard_metrics_metrics_graphite_3_1_2.xml 591B
Maven__net_sourceforge_f2j_arpack_combined_all_0_1.xml 590B
Maven__org_apache_hadoop_hadoop_yarn_client_2_7_1.xml 589B
Maven__org_apache_hadoop_hadoop_annotations_2_7_1.xml 589B
Maven__org_apache_hadoop_hadoop_yarn_common_2_7_1.xml 589B
Maven__org_apache_curator_curator_framework_2_7_1.xml 586B
Maven__javax_annotation_javax_annotation_api_1_2.xml 585B
Maven__com_fasterxml_jackson_core_jackson_core_2_6_5.xml 583B
Maven__org_codehaus_janino_commons_compiler_3_0_0.xml 583B
Maven__commons_beanutils_commons_beanutils_1_7_0.xml 582B
Maven__org_scala_lang_modules_scala_xml_2_11_1_0_1.xml 581B
Maven__org_fusesource_leveldbjni_leveldbjni_all_1_8.xml 579B
Maven__org_apache_commons_commons_compress_1_4_1.xml 579B
Maven__commons_httpclient_commons_httpclient_3_1.xml 579B
Maven__org_apache_parquet_parquet_encoding_1_8_1.xml 579B
Maven__org_apache_spark_spark_unsafe_2_11_2_1_2.xml 578B
Maven__org_apache_spark_spark_sketch_2_11_2_1_2.xml 578B
Maven__org_apache_spark_spark_graphx_2_11_2_1_2.xml 578B
Maven__org_json4s_json4s_jackson_2_11_3_2_11.xml 575B
Maven__org_apache_directory_api_api_util_1_0_0_M20.xml 575B
Maven__org_apache_curator_curator_recipes_2_7_1.xml 572B
Maven__org_apache_parquet_parquet_jackson_1_8_1.xml 572B
Maven__org_apache_spark_spark_mllib_2_11_2_1_2.xml 571B
Maven__org_spire_math_spire_macros_2_11_0_7_4.xml 570B
Maven__org_glassfish_hk2_hk2_locator_2_4_0_b34.xml 568B
Maven__org_apache_hadoop_hadoop_yarn_api_2_7_1.xml 568B
Maven__com_jamesmurty_utils_java_xmlbuilder_0_4.xml 566B
Maven__com_univocity_univocity_parsers_2_2_1.xml 566B
Maven__org_codehaus_jackson_jackson_jaxrs_1_8_3.xml 566B
Maven__javax_servlet_javax_servlet_api_3_1_0.xml 566B
Maven__org_apache_parquet_parquet_common_1_8_1.xml 565B
Maven__org_apache_parquet_parquet_hadoop_1_8_1.xml 565B
Maven__org_apache_commons_commons_crypto_1_0_0.xml 565B
Maven__org_apache_httpcomponents_httpclient_4_1_2.xml 565B
Maven__org_apache_parquet_parquet_column_1_8_1.xml 565B
Maven__org_apache_curator_curator_client_2_7_1.xml 565B
Maven__org_apache_xbean_xbean_asm5_shaded_4_4.xml 564B
Maven__org_apache_spark_spark_tags_2_11_2_1_2.xml 564B
Maven__org_apache_spark_spark_core_2_11_2_1_2.xml 564B
Maven__io_dropwizard_metrics_metrics_json_3_1_2.xml 563B
Maven__io_dropwizard_metrics_metrics_core_3_1_2.xml 563B
共 193 条
- 1
- 2
资源评论
天天501
- 粉丝: 606
- 资源: 4665
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功