"""Beautiful Soup
Elixir and Tonic
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
Beautiful Soup parses a (possibly invalid) XML or HTML document into a
tree representation. It provides methods and Pythonic idioms that make
it easy to navigate, search, and modify the tree.
A well-formed XML/HTML document yields a well-formed data
structure. An ill-formed XML/HTML document yields a correspondingly
ill-formed data structure. If your document is only locally
well-formed, you can use this library to find and process the
well-formed part of it. The BeautifulSoup class
Beautiful Soup works with Python 2.2 and up. It has no external
dependencies, but you'll have more success at converting data to UTF-8
if you also install these three packages:
* chardet, for auto-detecting character encodings
http://chardet.feedparser.org/
* cjkcodecs and iconv_codec, which add more encodings to the ones supported
by stock Python.
http://cjkpython.i18n.org/
Beautiful Soup defines classes for two main parsing strategies:
* BeautifulStoneSoup, for parsing XML, SGML, or your domain-specific
language that kind of looks like XML.
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
or invalid. This class has web browser-like heuristics for
obtaining a sensible parse tree in the face of common HTML errors.
Beautiful Soup also defines a class (UnicodeDammit) for autodetecting
the encoding of an HTML or XML document, and converting it to
Unicode. Much of this code is taken from Mark Pilgrim's Universal Feed Parser.
For more than you ever wanted to know about Beautiful Soup, see the
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
"""
from __future__ import generators
__author__ = "Leonard Richardson (leonardr@segfault.org)"
__version__ = "3.0.4"
__copyright__ = "Copyright (c) 2004-2007 Leonard Richardson"
__license__ = "PSF"
from sgmllib import SGMLParser, SGMLParseError
import codecs
import types
import re
import sgmllib
try:
from htmlentitydefs import name2codepoint
except ImportError:
name2codepoint = {}
#This hack makes Beautiful Soup able to parse XML with namespaces
sgmllib.tagfind = re.compile('[a-zA-Z][-_.:a-zA-Z0-9]*')
DEFAULT_OUTPUT_ENCODING = "utf-8"
# First, the classes that represent markup elements.
class PageElement:
"""Contains the navigational information for some part of the page
(either a tag or a piece of text)"""
def setup(self, parent=None, previous=None):
"""Sets up the initial relations between this element and
other elements."""
self.parent = parent
self.previous = previous
self.next = None
self.previousSibling = None
self.nextSibling = None
if self.parent and self.parent.contents:
self.previousSibling = self.parent.contents[-1]
self.previousSibling.nextSibling = self
def replaceWith(self, replaceWith):
oldParent = self.parent
myIndex = self.parent.contents.index(self)
if hasattr(replaceWith, 'parent') and replaceWith.parent == self.parent:
# We're replacing this element with one of its siblings.
index = self.parent.contents.index(replaceWith)
if index and index < myIndex:
# Furthermore, it comes before this element. That
# means that when we extract it, the index of this
# element will change.
myIndex = myIndex - 1
self.extract()
oldParent.insert(myIndex, replaceWith)
def extract(self):
"""Destructively rips this element out of the tree."""
if self.parent:
try:
self.parent.contents.remove(self)
except ValueError:
pass
#Find the two elements that would be next to each other if
#this element (and any children) hadn't been parsed. Connect
#the two.
lastChild = self._lastRecursiveChild()
nextElement = lastChild.next
if self.previous:
self.previous.next = nextElement
if nextElement:
nextElement.previous = self.previous
self.previous = None
lastChild.next = None
self.parent = None
if self.previousSibling:
self.previousSibling.nextSibling = self.nextSibling
if self.nextSibling:
self.nextSibling.previousSibling = self.previousSibling
self.previousSibling = self.nextSibling = None
def _lastRecursiveChild(self):
"Finds the last element beneath this object to be parsed."
lastChild = self
while hasattr(lastChild, 'contents') and lastChild.contents:
lastChild = lastChild.contents[-1]
return lastChild
def insert(self, position, newChild):
if (isinstance(newChild, basestring)
or isinstance(newChild, unicode)) \
and not isinstance(newChild, NavigableString):
newChild = NavigableString(newChild)
position = min(position, len(self.contents))
if hasattr(newChild, 'parent') and newChild.parent != None:
# We're 'inserting' an element that's already one
# of this object's children.
if newChild.parent == self:
index = self.find(newChild)
if index and index < position:
# Furthermore we're moving it further down the
# list of this object's children. That means that
# when we extract this element, our target index
# will jump down one.
position = position - 1
newChild.extract()
newChild.parent = self
previousChild = None
if position == 0:
newChild.previousSibling = None
newChild.previous = self
else:
previousChild = self.contents[position-1]
newChild.previousSibling = previousChild
newChild.previousSibling.nextSibling = newChild
newChild.previous = previousChild._lastRecursiveChild()
if newChild.previous:
newChild.previous.next = newChild
newChildsLastElement = newChild._lastRecursiveChild()
if position >= len(self.contents):
newChild.nextSibling = None
parent = self
parentsNextSibling = None
while not parentsNextSibling:
parentsNextSibling = parent.nextSibling
parent = parent.parent
if not parent: # This is the last element in the document.
break
if parentsNextSibling:
newChildsLastElement.next = parentsNextSibling
else:
newChildsLastElement.next = None
else:
nextChild = self.contents[position]
newChild.nextSibling = nextChild
if newChild.nextSibling:
newChild.nextSibling.previousSibling = newChild
newChildsLastElement.next = nextChild
if newChildsLastElement.next:
newChildsLastElement.next.previous = newChildsLastElement
self.contents.insert(position, newChild)
def findNext(self, name=None, attrs={}, text=None, **kwargs):
"""Returns the first item that matches the given criteria and
appears after this Tag in the document."""
return self._findOne(self.findAllNext, name, attrs, text, **kwargs)
def findAllNext(self, name=None, attrs={}, text=None, limit=None,
**kwargs):
"""Returns all items that match the given criteria and appear
before after Tag in the document."""
return self._findAll(name, attrs, text, limit, self.nextGenerator)
def findNextSibling(self, name=None, attrs={}, text=None, **kw
没有合适的资源?快使用搜索试试~ 我知道了~
openlayers-map
共1830个文件
html:1312个
js:319个
png:57个
5星 · 超过95%的资源 需积分: 17 31 下载量 200 浏览量
2010-10-27
08:06:46
上传
评论
收藏 7.47MB ZIP 举报
温馨提示
openlayers-map openlayers-map openlayers-map openlayers-map
资源推荐
资源详情
资源评论
收起资源包目录
openlayers-map (1830个子文件)
jsmin.c 7KB
library.cfg 2KB
config.cfg 988B
full.cfg 644B
lite.cfg 514B
proxy.cgi 2KB
1.css 19KB
1.css 19KB
style.css 8KB
firebug.css 3KB
style.css 2KB
OL.css 246B
OL.css 246B
2.css 246B
2.css 246B
ie6-style.css 165B
google.css 143B
main.css 44B
main.css 44B
framedCloud.css 0B
customization 1KB
close.gif 1KB
overview_replacement.gif 79B
blank.gif 42B
blank.gif 42B
General7.html 269KB
General3.html 253KB
General4.html 253KB
Functions7.html 245KB
General17.html 233KB
General18.html 229KB
General14.html 204KB
Functions4.html 201KB
Functions13.html 180KB
Map-js.html 180KB
Functions3.html 175KB
General15.html 173KB
General11.html 169KB
General7.html 164KB
Functions14.html 161KB
v1_1_1.html 157KB
Classes.html 155KB
Classes.html 155KB
General12.html 148KB
Functions5.html 148KB
Map-js.html 147KB
General6.html 144KB
General3.html 134KB
General.html 134KB
Util-js.html 125KB
Functions6.html 125KB
GeneralC.html 121KB
General15.html 120KB
Elements-js.html 119KB
General4.html 119KB
Functions.html 114KB
Layer-js.html 109KB
General14.html 106KB
General9.html 106KB
Functions10.html 105KB
General19.html 103KB
General22.html 102KB
v1-js.html 100KB
General6.html 98KB
General5.html 97KB
GeneralO.html 96KB
Properties12.html 96KB
Functions12.html 94KB
General12.html 93KB
General11.html 93KB
KML-js.html 92KB
GeneralG.html 92KB
GeneralD.html 91KB
Vector-js.html 91KB
Functions9.html 91KB
Functions3.html 91KB
Events-js.html 91KB
auto-tests.html 89KB
Functions17.html 89KB
v3.html 89KB
run-tests.html 89KB
XML-js.html 88KB
Properties5.html 88KB
Popup-js.html 88KB
General8.html 88KB
Properties9.html 88KB
SVG-js.html 88KB
Properties2.html 87KB
General10.html 87KB
GeneralP.html 87KB
Constructor.html 86KB
Constructor.html 86KB
Properties14.html 86KB
Layer-js.html 86KB
Constructor2.html 85KB
Constructor2.html 85KB
Properties10.html 85KB
Properties3.html 85KB
Constants.html 85KB
Constants.html 85KB
共 1830 条
- 1
- 2
- 3
- 4
- 5
- 6
- 19
资源评论
- wm9526237932014-05-27还可以,和想要的结果不太一样。
- 竹子_浅浅2014-05-06这个还好吧!对现在很有用
- tom_52015-05-21试试吧,现在急需用
Mstr838Wind
- 粉丝: 1
- 资源: 31
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- 三次样条插值在C语言如何实现步骤介绍.docx
- SD6084电流模式同步降压转换器固定频率1.5MHz二极管封装SOT23-5
- NokoPrint-wifi蓝牙USB连接打印机[安卓免费App]
- 20211115aMmF9NbS.zip
- 解线性方程组-直接解法:(Gauss)高斯消去法、列主元、全主元 - 北太天元
- MapReduce单词统计 hadoop集群
- 深度学习源码神经网络新闻分类多分类问题ipynb源码带数据集
- 深度学习源码神经网络用预训练的卷积神经网络ipynb源码带数据集
- 深度学习源码神经网络使用词嵌入ipynb源码带数据集
- 深度学习源码神经网络使用LSTM生成文本ipynb源码带数据集
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功