基于Spark通过Web访问系统设计源码.zip资源-CSDN文库

共75个文件

js：37个

html：12个

css：12个

版权申诉

源码

毕业设计

课程设计

java

学习资料

133 浏览量 2024-01-15 00:37:14 上传评论收藏 2.33MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

基于Spark通过Web访问系统设计源码.zip （75个子文件）

code_20105

datatables.html 8KB

profile.html 8KB

card.html 13KB

bootstrap-wysihtml5

bootstrap3-wysihtml5.min.css 2KB

maps.html 7KB

calender.js 3KB

popper.js 19KB

jquery.googlemap.js 3KB

dashboard3.js 2KB

tooltip.js 5KB

forms.js 2KB

widgets.js 2KB

dashboard.js 5KB

morris.js 2KB

dashboard2.js 3KB

chartjs.js 2KB

apexcharts.js 961KB

jquery.ui-sliders.js 1KB

echarts.js 7KB

formeditor.js 1KB

gmaps.js 65KB

chart.min.js 154KB

flot.js 3KB

barcharts.js 4KB

jvectormap.js 3KB

moment.min.js 50KB

jquery.min.js 85KB

dataclick.js 238B

scripts.js 4KB

othercharts.js 6KB

index2.html 12KB

css01

bootstrap.min.css 118KB

app.css 2KB

bootstrap

bootstrap.min.js 50KB

css

bootstrap.min.css 138KB

chartist

chart.chartist.js 5KB

chartist.css 14KB

chartist-plugin-tooltip.css 565B

chartist-plugin-tooltip.js 5KB

chartist.js 174KB

count-down

jquery.lwtCountdown-1.0.js 5KB

img

brand

logo.png 41KB

logo-tst.png 3KB

loader.svg 1KB

typing.svg 1KB

news

img15.jpg 1.45MB

avatar

avatar-1.jpeg.jpg 22KB

avatar-1.jpeg 22KB

favicon.ico 462B

spinner.svg 2KB

china.js 59KB

css

style.css 105KB

icons.css 770B

table.html 26KB

vector-map.html 52KB

echarts.min.js 468KB

bootstrap-datepicker

bootstrap-datepicker.css 17KB

bootstrap-datepicker.js 56KB

index.html 82KB

marker.html 2KB

爬虫代码

lib

util.py 15KB

crawler.py 14KB

worker.py 24KB

main.py 6KB

config.py 1KB

about.html 8KB

bootstrap-timepicker

bootstrap-timepicker.js 34KB

bootstrap-timepicker.min.css 3KB

index3.html 26KB

bootstrap-colorpicker

bootstrap-colorpicker.min.js 20KB

bootstrap-colorpicker.min.css 13KB

Chart.js

dist

Chart.bundle.js 429KB

bootstrap-daterangepicker

daterangepicker.css 8KB

daterangepicker.js 69KB

# -*- coding: utf-8 -*- import csv import datetime import json import math import time import redis from sshtunnel import SSHTunnelForwarder from lib.crawler import StatsGovCn from lib.util import DBUtilStatsGovCn def fetch_stats_gov_cn(url, db_path, show_log=True, sleep_time=0): """ 采集统计局信息 :param url: 统计局信息根网址 :type url: str :param db_path: SQLite数据库路径 :type db_path: str :param show_log: 是否显示日志 :type show_log: bool :param sleep_time: 爬虫每次爬取后的休眠时间，单位为秒。 :type sleep_time: int :return: """ # 程序开始时间 begin_time = time.time() stats_gov_cn_crawler = StatsGovCn() stats_gov_cn_crawler.sleep_time = sleep_time if stats_gov_cn_crawler.check(url.replace('$ROUTE$', 'index.html'))[0] != 'province': raise Exception('不是省级信息页面') # 数据库操作对象 db_util = DBUtilStatsGovCn(db_path + 'db_stats.gov.cn.sqlite') # 上级 URL 地址 url_base = {} url_base_temp = '' # 抓取并保存省级信息 if show_log: print(f'[Log][{datetime.datetime.now()}] 开始抓取并保存省级信息') provinces = stats_gov_cn_crawler.province(url.replace('$ROUTE$', 'index.html')) db_util.truncate_province() for province in provinces: db_util.insert_province(province['statistical_code'], province['code'], province['name']) url_base[province['statistical_code']] = url.replace('$ROUTE$', '') if show_log: print(f'[Log][{datetime.datetime.now()}] 完成抓取并保存省级信息') print(f'[REPORT] 省级信息 {len(provinces)} 个') # 抓取并保存地级信息 if show_log: print(f'[Log][{datetime.datetime.now()}] 开始抓取并保存地级信息') cities = [] db_util.truncate_city() for province in provinces: if show_log: province_name_temp = province['name'] print(f'[Log][{datetime.datetime.now()}] [{provinces.index(province) + 1}/{len(provinces)}] ' f'开始抓取并保存【{province_name_temp}】') if province['href'] != '': url_base_temp = url_base[province['statistical_code']] + province['href'] cities_temp = stats_gov_cn_crawler.city(url_base_temp) for city in cities_temp: cities.append(city) db_util.insert_city(city['statistical_code'], city['code'], city['name'], province['statistical_code']) url_base[city['statistical_code']] = url_base_temp[0:url_base_temp.rfind('/')+1] if show_log: province_name_temp = province['name'] print(f'[Log][{datetime.datetime.now()}] [{provinces.index(province) + 1}/{len(provinces)}] ' f'完成抓取并保存【{province_name_temp}】') if show_log: print(f'[Log][{datetime.datetime.now()}] 完成抓取并保存地级信息') print(f'[REPORT] 地级信息 {len(cities)} 个') # 抓取并保存县级信息 if show_log: print(f'[Log][{datetime.datetime.now()}] 开始抓取并保存县级信息') counties = [] db_util.truncate_county() for city in cities: city_db_temp = db_util.select_city(city['statistical_code']) if show_log: province_name_temp = db_util.select_province(city_db_temp['province_statistical_code'])['name'] city_name_temp = city['name'] print(f'[Log][{datetime.datetime.now()}] [{cities.index(city) + 1}/{len(cities)}] ' f'开始抓取并保存【{province_name_temp}】【{city_name_temp}】') if city['href'] != '': try: url_base_temp = url_base[city['statistical_code']] + city['href'] counties_temp = stats_gov_cn_crawler.county(url_base_temp) except Exception as e: if e.args[0] == '不是县级信息页面': counties_temp = [{ 'href': city['href'][city['href'].find('/')+1:], 'statistical_code': city['statistical_code'], 'code': city['statistical_code'][0:6], 'name': city['name'] }] else: raise e for county in counties_temp: counties.append(county) db_util.insert_county( county['statistical_code'], county['code'], county['name'], city_db_temp['province_statistical_code'], city['statistical_code'] ) url_base[county['statistical_code']] = url_base_temp[0:url_base_temp.rfind('/')+1] if show_log: province_name_temp = db_util.select_province(city_db_temp['province_statistical_code'])['name'] city_name_temp = city['name'] print(f'[Log][{datetime.datetime.now()}] [{cities.index(city) + 1}/{len(cities)}] ' f'完成抓取并保存【{province_name_temp}】【{city_name_temp}】') if show_log: print(f'[Log][{datetime.datetime.now()}] 完成抓取并保存县级信息') print(f'[REPORT] 县级信息 {len(counties)} 个') # 抓取并保存乡级信息 if show_log: print(f'[Log][{datetime.datetime.now()}] 开始抓取并保存乡级信息') towns = [] db_util.truncate_town() for county in counties: county_db_temp = db_util.select_county(county['statistical_code']) if show_log: province_name_temp = db_util.select_province(county_db_temp['province_statistical_code'])['name'] city_name_temp = db_util.select_city(county_db_temp['city_statistical_code'])['name'] county_name_temp = county['name'] print(f'[Log][{datetime.datetime.now()}] [{counties.index(county) + 1}/{len(counties)}] ' f'开始抓取并保存【{province_name_temp}】【{city_name_temp}】【{county_name_temp}】') if county['href'] != '': try: url_base_temp = url_base[county['statistical_code']] + county['href'] towns_temp = stats_gov_cn_crawler.town(url_base_temp) except Exception as e: if e.args[0] == '不是乡级信息页面': towns_temp = [{ 'href': county['href'][county['href'].find('/')+1:], 'statistical_code': county['statistical_code'], 'code': county['statistical_code'][0:9], 'name': county['name'] }] else: raise e for town in towns_temp: towns.append(town) db_util.insert_town( town['statistical_code'], town['code'], town['name'], county_db_temp['province_statistical_code'], county_db_temp['city_statistical_code'], county['statistical_code'] ) url_base[town['statistical_code']] = url_base_temp[0:url_base_temp.rfind('/')+1] if show_log: province_name_temp = db_util.select_province(county_db_temp['province_statistical_code'])['name'] city_name_temp = db_util.select_city(county_db_temp['city_statistical_code'])['name'] county_name_temp = county['name'] print(f'[Log][{datetime.datetime.now()}] [{counties.index(county) + 1}/{len(counties)}] ' f'完成抓取并保存【{province_name_temp}】【{city_name_temp}】【{county_name_temp}】') if show_log: print(f'[Log][{datetime.datetime.now()}] 完成抓取并保存乡级信息') print(f'[REPORT] 乡级信息 {len(towns)} 个') # 抓取并保存村级信息 if show_log: print(f'[Log][{

评论收藏

内容反馈

版权申诉