#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright 2012 Channing Wong
#
# @mail: channing.wong@yahoo.com
# @home: http://blog.3363.me/
# @date: Mar 3, 2012
#
import json
import sys
import time
import types
import urllib
reload(sys)
sys.setdefaultencoding('utf-8')
class BaiduMap:
"""
"""
def __init__(self, keyword):
self.keyword = keyword
self.query = [
('b', '(-1599062.039999999,811604.75;24779177.96,8168020.75)'),
('c', '1'),
('from', 'webmap'),
('ie', 'utf-8'),
('l', '4'),
('newmap', '1'),
('qt', 's'),
('src', '0'),
('sug', '0'),
('t', time.time().__int__()),
('tn', 'B_NORMAL_MAP'),
('wd', keyword),
('wd2', '')
]
self.mapurl = 'http://map.baidu.com/'
self.file = open('%s.txt' % keyword, 'w')
self.count = 0
self.count_c = 0
self.total_num = 0
self._get_city()
def _fetch(self, query=None, json=True):
data = urllib.urlencode(query)
url = self.mapurl + '?' + data
opener = urllib.FancyURLopener()
data = opener.open(url).read()
if json:
return self._tojson(data)
else:
return data
def _tojson(self, data):
try:
js = json.loads(data, 'utf-8')
except:
js = None
return js
def _get_city(self):
data = self._fetch(self.query)
if type(data['content']) is not types.ListType:
print 'keyworld error.'
sys.exit()
self.city = data['content']
if data.has_key('more_city'):
for c in data['more_city']:
self.city.extend(c['city'])
for city in self.city:
self.total_num += city['num']
def _get_data(self, city, page=0):
query = [
('addr', '0'),
('b', '(%s)' % city['geo'].split('|')[1]),
('c', city['code']),
('db', '0'),
('gr', '3'),
('ie', 'utf-8'),
('l', '9'),
('newmap', '1'),
('on_gel', '1'),
('pn', page),
('qt', 'con'),
('src', '7'),
('sug', '0'),
('t', time.time().__int__()),
('tn', 'B_NORMAL_MAP'),
('wd', self.keyword),
('wd2', ''),
]
data = self._fetch(query)
return data
def _save(self, content, city):
for c in content:
self.count += 1
self.count_c += 1
if c.has_key('tel'):
tel = c['tel']
else:
tel = ''
_data = '%s\t%s\t%s\t%s\n' % (city['name'], c['name'], c['addr'], tel)
self.file.write(_data)
print '(%s/%s) %s[%s/%s]' % (self.count, self.total_num, city['name'], self.count_c, city['num'])
def get(self, city):
self.count_c = 0
pages = abs(-city['num'] / 10)
for page in range(0, pages):
data = self._get_data(city, page)
if data.has_key('content'):
self._save(data['content'], city)
def get_all(self):
for city in self.city:
self.get(city)
self.file.close()
if __name__ == '__main__':
if sys.argv.__len__() > 1:
keyword = sys.argv[1]
else:
keyword = '钻石'
baidumap = BaiduMap(keyword)
print '_' * 20
print 'CITY: %s' % baidumap.city.__len__()
print 'DATA: %s' % baidumap.total_num
baidumap.get_all()
Python采集百度地图数据.zip
3星 · 超过75%的资源 需积分: 17 169 浏览量
2018-05-10
17:14:10
上传
评论 5
收藏 2KB ZIP 举报
syf_888
- 粉丝: 16
- 资源: 117
最新资源
- libjpeg 编译所需的 Win32.mak vs编译libjpeg
- 自动驾驶-状态估计和定位-粒子滤波实现和源码.pdf
- 数据可视化-智慧物流服务中心大屏页面.zip
- yolov5,SSD 可能使用到的一些代码
- bbbbbbbbbbbbbbbbbb
- 安卓逆向学习笔记之Frida Stalker 还原OLLVM AES.docx
- 安卓逆向学习笔记之unicorn来trace还原OLLVM Base64.docx
- 基于jquery的自定义表格组件实现
- Nessus最新20240426离线安装插件all-2.0.tar.gz
- 最新版本私钥助记词碰撞器大富豪使用python进行制作通过接口的方式进行验证支持多币种多链多网络一分钟万次验证高出货率
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈