#javajob1数据清洗
#薪资数据清洗
create table Money_source(money int);
insert into table Money_source
select sort_array(array(SELECT SUBSTRING_INDEX(money,'-',1) from javajob1;)) from ;
#经验数据清洗
create table background_source(background int);
create table background(background varchar(30),number int);
#截取有经验限制的最低经验要求
insert into table background_source
SELECT SUBSTRING_INDEX(jiangyan,'-',1) from javajob1;
#javajob2数据清理
CREATE EXTERNAL TABLE new_JAVAjob2 (jobname string,name string,jingyan string,eduaction string,money string,adress string,tags string,times string) row format delimited fields terminated by ',';
insert into table new_javajob2
select jobname,name,jingyan,eduaction,money,adress,tags,times from javajob2 where jobname like '%Java%';
#javajob1数据处理
#A.经验处理
#1.提取出经验不限的个数
insert into table background
select '经验不限',count(jiangyan) from javajob1 where jiangyan like '%经验不限%';
#2.提取出最低经验大于4的
insert into table background
select '五年以上',count(background) from background_source where background>4;
#2.提取出最低经验小于3的
insert into table background
select '三年以下',count(background) from background_source where background<=3;
#B.薪资处理
create table money(source varchar(30),number int);
insert into table money
select '最大值',max(source) from money_source;
select '最小值',min(source) from money_source;
select '均值',avg(source) from money_source;
#C.学历处理
create table education(education varchar(30),number int);
insert into table education
select '本科',count(education) from javajob1 where education like '%本科%';
insert into table education
select '大专',count(education) from javajob1 where education like '%大专%';
insert into table education
select '学历不限',count(education) from javajob1 where education like '%学历不限%';
insert into table education
select '其他',count(education) from javajob1 where education not like '%学历不限%' and education not like '%本科%' and education not like '%大专%';
#javajob2数据处理
#A.城市比例
create table JAVA_chengshitongji(address varchar(30),jobnumber int);
insert into table JAVA_chengshitongji
select '成都',count(adress) from new_javajob2 where adress like '%成都%';
insert into table JAVA_chengshitongji
select '上海',count(adress) from new_javajob2 where adress like '%上海%';
insert into table JAVA_chengshitongji
select '北京',count(adress) from new_javajob2 where adress like '%北京%';
insert into table JAVA_chengshitongji
select '广州',count(adress) from new_javajob2 where adress like '%广州%';
insert into table JAVA_chengshitongji
select '深圳',count(adress) from new_javajob2 where adress like '%深圳%';
#B.职位比例
create table JAVA_zhiweitongji(position_category varchar(30),jobnumber int);
insert into table JAVA_zhiweitongji
select '高级开发师',count(jobname) from new_javajob2 where jobname like '%高级开发%';
insert into table JAVA_zhiweitongji
select '中级开发师',count(jobname) from new_javajob2 where jobname like '%中级开发%';
insert into table JAVA_zhiweitongji
select '初级开发师',count(jobname) from new_javajob2 where jobname like '%初级%';
insert into table JAVA_zhiweitongji
select '架构师',count(jobname) from new_javajob2 where jobname like '%架构%';
#C.各职位城市比例
#高级
create table gaoji_chengshi(address varchar(30),jobnumber int);
insert into table gaoji_chengshi
select '成都',count(adress) from new_javajob2 where jobname like '%高级开发%' and adress like '%成都%';
select '北京',count(adress) from new_javajob2 where jobname like '%高级开发%' and adress like '%北京%';
select '上海',count(adress) from new_javajob2 where jobname like '%高级开发%' and adress like '%上海%';
select '广州',count(adress) from new_javajob2 where jobname like '%高级开发%' and adress like '%广州%';
select '深圳',count(adress) from new_javajob2 where jobname like '%高级开发%' and adress like '%深圳%';
#中级
create table zhongji_chengshi(address varchar(30),jobnumber int);
insert into table zhongji_chengshi
select '成都',count(adress) from new_javajob2 where jobname like '%中级开发%' and adress like '%成都%';
select '北京',count(adress) from new_javajob2 where jobname like '%中级开发%' and adress like '%北京%';
select '上海',count(adress) from new_javajob2 where jobname like '%中级开发%' and adress like '%上海%';
select '广州',count(adress) from new_javajob2 where jobname like '%中级开发%' and adress like '%广州%';
select '深圳',count(adress) from new_javajob2 where jobname like '%中级开发%' and adress like '%深圳%';
#初级
create table diji_chengshi(address varchar(30),jobnumber int);
insert into table diji_chengshi
select '成都',count(adress) from new_javajob2 where jobname like '%初级%' and adress like '%成都%';
select '北京',count(adress) from new_javajob2 where jobname like '%初级%' and adress like '%北京%';
select '上海',count(adress) from new_javajob2 where jobname like '%初级%' and adress like '%上海%';
select '广州',count(adress) from new_javajob2 where jobname like '%初级%' and adress like '%广州%';
select '深圳',count(adress) from new_javajob2 where jobname like '%初级%' and adress like '%深圳%';
#架构
create table jiagou_chengshi(address varchar(30),jobnumber int);
insert into table jiagou_chengshi
select '成都',count(adress) from new_javajob2 where jobname like '%架构%' and adress like '%成都%';
select '北京',count(adress) from new_javajob2 where jobname like '%架构%' and adress like '%北京%';
select '上海',count(adress) from new_javajob2 where jobname like '%架构%' and adress like '%上海%';
select '广州',count(adress) from new_javajob2 where jobname like '%架构%' and adress like '%广州%';
select '深圳',count(adress) from new_javajob2 where jobname like '%架构%' and adress like '%深圳%';
#导入mysql数据
#money
bin/sqoop export \
--connect jdbc:mysql://hadoop004:3306/jobs?useUnicode=true\&characterEncoding=utf-8 \
--username root \
--password JJ12315kk. \
--table money \
--columns "source,number" \
--fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/money \
--m 1
#background
bin/sqoop export \
--connect jdbc:mysql://hadoop004:3306/jobs?useUnicode=true\&characterEncoding=utf-8 \
--username root \
--password JJ12315kk. \
--table background \
--columns "background,number" \
--fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/background \
--m 1
#Education
bin/sqoop export \
--connect jdbc:mysql://hadoop004:3306/jobs?useUnicode=true\&characterEncoding=utf-8 \
--username root \
--password JJ12315kk. \
--table education \
--columns "education,number" \
--fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/education \
--m 1
#JAVA_chengshitongji
create table JAVA_chengshitongji(address varchar(30),jobnumber int);
bin/sqoop export \
--connect jdbc:mysql://hadoop004:3306/jobs?useUnicode=true\&characterEncoding=utf-8 \
--username root \
--password JJ12315kk. \
--table JAVA_chengshitongji \
--columns "address,jobnumber" \
--fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/java_chengshitongji \
--m 1
#JAVA_zhiweitongji
create table JAVA_zhiweitongji(position_category varchar(30),jobnumber int);
bin/sqoop export \
--connect jdbc:mysql://hadoop004:3306/jobs?useUnicode=true\&characterEncoding=utf-8 \
--username root \
--password JJ12315kk. \
--table JAVA_zhiweitongji \
--columns "position_category,jobnumber" \
--fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/jav
没有合适的资源?快使用搜索试试~ 我知道了~
资源详情
资源评论
资源推荐
收起资源包目录
基于互联网招聘信息统计与分析 (706个子文件)
scrapy.cfg 257B
scrapy.cfg 255B
GoodsEntity.class 2KB
GoodsEntity.class 2KB
GoodsEntity.class 2KB
GoodsEntity.class 2KB
GoodsEntity.class 2KB
GoodsController.class 927B
GoodsController.class 925B
GoodsController.class 918B
GoodsController.class 917B
GoodsController.class 913B
DemoApplication.class 733B
DemoApplication.class 733B
DemoApplication.class 733B
DemoApplication.class 733B
DemoApplication.class 733B
GoodsServicelmpl.class 725B
GoodsServicelmpl.class 725B
GoodsServicelmpl.class 725B
GoodsServicelmpl.class 725B
GoodsServicelmpl.class 725B
DemoApplicationTests.class 531B
DemoApplicationTests.class 531B
DemoApplicationTests.class 531B
DemoApplicationTests.class 531B
DemoApplicationTests.class 531B
GoodsDao.class 383B
GoodsDao.class 383B
GoodsDao.class 383B
GoodsDao.class 383B
GoodsDao.class 383B
GoodsService.class 322B
GoodsService.class 322B
GoodsService.class 322B
GoodsService.class 322B
GoodsService.class 322B
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
mvnw.cmd 6KB
qiancheng.csv 8.73MB
qiancheng.csv 8.73MB
boss.csv 57KB
boss.csv 57KB
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 395B
.gitignore 50B
.gitignore 50B
.gitignore 50B
.gitignore 50B
.gitignore 50B
.gitignore 50B
.gitignore 50B
index.html 9KB
index.html 9KB
index.html 4KB
index.html 4KB
index.html 4KB
index.html 4KB
index.html 4KB
index.html 4KB
index.html 2KB
index.html 2KB
JAVA_chengshitongji.iml 10KB
Mysql-Javajobs.iml 10KB
Mysql-Javajobs.iml 10KB
Mysql-Javajobs.iml 10KB
zhiweitongji.iml 10KB
demo.iml 8KB
demo.iml 8KB
demo.iml 8KB
demo.iml 8KB
demo.iml 8KB
gongzuo.iml 291B
spider.iml 291B
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
maven-wrapper.jar 50KB
共 706 条
- 1
- 2
- 3
- 4
- 5
- 6
- 8
平平无奇秃头小天才
- 粉丝: 117
- 资源: 4
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功
评论0