from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import with_metaclass
import types
from . import inputstream
from . import tokenizer
from . import treebuilders
from .treebuilders._base import Marker
from . import utils
from . import constants
from .constants import spaceCharacters, asciiUpper2Lower
from .constants import specialElements
from .constants import headingElements
from .constants import cdataElements, rcdataElements
from .constants import tokenTypes, ReparseException, namespaces
from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
def parse(doc, treebuilder="etree", encoding=None,
namespaceHTMLElements=True):
"""Parse a string or file-like object into a tree"""
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
return p.parse(doc, encoding=encoding)
def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
namespaceHTMLElements=True):
tb = treebuilders.getTreeBuilder(treebuilder)
p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
return p.parseFragment(doc, container=container, encoding=encoding)
def method_decorator_metaclass(function):
class Decorated(type):
def __new__(meta, classname, bases, classDict):
for attributeName, attribute in classDict.items():
if isinstance(attribute, types.FunctionType):
attribute = function(attribute)
classDict[attributeName] = attribute
return type.__new__(meta, classname, bases, classDict)
return Decorated
class HTMLParser(object):
"""HTML parser. Generates a tree structure from a stream of (possibly
malformed) HTML"""
def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
strict=False, namespaceHTMLElements=True, debug=False):
"""
strict - raise an exception when a parse error is encountered
tree - a treebuilder class controlling the type of tree that will be
returned. Built in treebuilders can be accessed through
html5lib.treebuilders.getTreeBuilder(treeType)
tokenizer - a class that provides a stream of tokens to the treebuilder.
This may be replaced for e.g. a sanitizer which converts some tags to
text
"""
# Raise an exception on the first error encountered
self.strict = strict
if tree is None:
tree = treebuilders.getTreeBuilder("etree")
self.tree = tree(namespaceHTMLElements)
self.tokenizer_class = tokenizer
self.errors = []
self.phases = dict([(name, cls(self, self.tree)) for name, cls in
getPhases(debug).items()])
def _parse(self, stream, innerHTML=False, container="div",
encoding=None, parseMeta=True, useChardet=True, **kwargs):
self.innerHTMLMode = innerHTML
self.container = container
self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
parseMeta=parseMeta,
useChardet=useChardet,
parser=self, **kwargs)
self.reset()
while True:
try:
self.mainLoop()
break
except ReparseException:
self.reset()
def reset(self):
self.tree.reset()
self.firstStartTag = False
self.errors = []
self.log = [] # only used with debug mode
# "quirks" / "limited quirks" / "no quirks"
self.compatMode = "no quirks"
if self.innerHTMLMode:
self.innerHTML = self.container.lower()
if self.innerHTML in cdataElements:
self.tokenizer.state = self.tokenizer.rcdataState
elif self.innerHTML in rcdataElements:
self.tokenizer.state = self.tokenizer.rawtextState
elif self.innerHTML == 'plaintext':
self.tokenizer.state = self.tokenizer.plaintextState
else:
# state already is data state
# self.tokenizer.state = self.tokenizer.dataState
pass
self.phase = self.phases["beforeHtml"]
self.phase.insertHtmlElement()
self.resetInsertionMode()
else:
self.innerHTML = False
self.phase = self.phases["initial"]
self.lastPhase = None
self.beforeRCDataPhase = None
self.framesetOK = True
def isHTMLIntegrationPoint(self, element):
if (element.name == "annotation-xml" and
element.namespace == namespaces["mathml"]):
return ("encoding" in element.attributes and
element.attributes["encoding"].translate(
asciiUpper2Lower) in
("text/html", "application/xhtml+xml"))
else:
return (element.namespace, element.name) in htmlIntegrationPointElements
def isMathMLTextIntegrationPoint(self, element):
return (element.namespace, element.name) in mathmlTextIntegrationPointElements
def mainLoop(self):
CharactersToken = tokenTypes["Characters"]
SpaceCharactersToken = tokenTypes["SpaceCharacters"]
StartTagToken = tokenTypes["StartTag"]
EndTagToken = tokenTypes["EndTag"]
CommentToken = tokenTypes["Comment"]
DoctypeToken = tokenTypes["Doctype"]
ParseErrorToken = tokenTypes["ParseError"]
for token in self.normalizedTokens():
new_token = token
while new_token is not None:
currentNode = self.tree.openElements[-1] if self.tree.openElements else None
currentNodeNamespace = currentNode.namespace if currentNode else None
currentNodeName = currentNode.name if currentNode else None
type = new_token["type"]
if type == ParseErrorToken:
self.parseError(new_token["data"], new_token.get("datavars", {}))
new_token = None
else:
if (len(self.tree.openElements) == 0 or
currentNodeNamespace == self.tree.defaultNamespace or
(self.isMathMLTextIntegrationPoint(currentNode) and
((type == StartTagToken and
token["name"] not in frozenset(["mglyph", "malignmark"])) or
type in (CharactersToken, SpaceCharactersToken))) or
(currentNodeNamespace == namespaces["mathml"] and
currentNodeName == "annotation-xml" and
token["name"] == "svg") or
(self.isHTMLIntegrationPoint(currentNode) and
type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
phase = self.phase
else:
phase = self.phases["inForeignContent"]
if type == CharactersToken:
new_token = phase.processCharacters(new_token)
elif type == SpaceCharactersToken:
new_token = phase.processSpaceCharacters(new_token)
elif type == StartTagToken:
new_token = phase.processStartTag(new_token)
elif type == EndTagToken:
new_token = phase.processEndTag(new_token)
elif type == CommentToken:
new_token = phase.processComment(new_token)
elif type == DoctypeToken:
new_token = phase.processDoctype(new_token)
if (type == StartTagToken and token["selfClosing"]
and not t
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
【资源说明】 基于Python开发的学生管理系统的设计与实现+详细文档+全部资料(高分毕业设计).zip基于Python开发的学生管理系统的设计与实现+详细文档+全部资料(高分毕业设计).zip 【备注】 1、该资源内项目代码都经过测试运行成功,功能ok的情况下才上传的,请放心下载使用! 2、本项目适合计算机相关专业(如软件工程、计科、人工智能、通信工程、自动化、电子信息等)的在校学生、老师或者企业员工下载使用,也可作为毕设项目、课程设计、作业、项目初期立项演示等,当然也适合小白学习进阶。 3、如果基础还行,可以在此代码基础上进行修改,以实现其他功能,也可直接用于毕设、课设、作业等。 欢迎下载,沟通交流,互相学习,共同进步!
资源推荐
资源详情
资源评论
收起资源包目录
基于Python开发的学生管理系统的设计与实现+详细文档+全部资料(高分毕业设计).zip (2000个子文件)
python2.7 22B
distutils.cfg 228B
base.css 16KB
widgets.css 10KB
forms.css 8KB
changelists.css 6KB
rtl.css 4KB
login.css 1KB
ol3.css 657B
fonts.css 423B
dashboard.css 412B
encodings 28B
gui-64.exe 74KB
cli-64.exe 73KB
cli-arm-32.exe 68KB
gui-arm-32.exe 68KB
cli-32.exe 64KB
cli.exe 64KB
gui-32.exe 64KB
gui.exe 64KB
tabular.html 4KB
change_form.html 3KB
base.html 3KB
change_list.html 3KB
index.html 3KB
delete_confirmation.html 2KB
change_password.html 2KB
delete_selected_confirmation.html 2KB
stacked.html 2KB
password_change_form.html 2KB
openlayers.html 2KB
login.html 2KB
openlayers.html 2KB
model_detail.html 2KB
fieldset.html 2KB
template_filter_index.html 2KB
template_tag_index.html 2KB
view_index.html 2KB
change_list_results.html 2KB
object_history.html 1KB
related_widget_wrapper.html 1KB
related_widget_wrapper.html 1KB
password_reset_confirm.html 1KB
bookmarklets.html 1KB
model_index.html 1KB
index.html 1KB
search_form.html 1KB
actions.html 1012B
template_detail.html 995B
password_reset_form.html 970B
view_detail.html 896B
submit_line.html 790B
missing_docutils.html 734B
password_change_done.html 671B
password_reset_done.html 669B
password_reset_email.html 582B
pagination.html 554B
500.html 527B
clearable_file_input.html 518B
index.html 509B
password_reset_complete.html 505B
invalid_setup.html 437B
clearable_file_input.html 418B
clearable_file_input.html 418B
multiple_input.html 391B
app_index.html 385B
select.html 384B
logged_out.html 374B
date_hierarchy.html 372B
select.html 365B
multiple_input.html 360B
popup_response.html 358B
foreign_key_raw_id.html 346B
filter.html 330B
openlayers-osm.html 321B
base_site.html 316B
add_form.html 310B
404.html 268B
prepopulated_fields_js.html 245B
split_datetime.html 240B
url.html 226B
input_option.html 205B
input_option.html 205B
object_delete_summary.html 188B
read_only_password_hash.html 177B
input.html 174B
input.html 174B
attrs.html 172B
attrs.html 155B
textarea.html 145B
textarea.html 145B
osm.html 111B
select_option.html 110B
select_option.html 110B
multiwidget.html 84B
multiwidget.html 84B
checkbox_select.html 57B
radio.html 57B
checkbox_select.html 57B
radio.html 57B
共 2000 条
- 1
- 2
- 3
- 4
- 5
- 6
- 20
资源评论
不走小道
- 粉丝: 3243
- 资源: 5112
下载权益
C知道特权
VIP文章
课程特权
开通VIP
上传资源 快速赚钱
- 我的内容管理 展开
- 我的资源 快来上传第一个资源
- 我的收益 登录查看自己的收益
- 我的积分 登录查看自己的积分
- 我的C币 登录后查看C币余额
- 我的收藏
- 我的下载
- 下载帮助
最新资源
- java-leetcode题解之第111题二叉树的最小深度.zip
- java-leetcode题解之第110题平衡二叉树.zip
- java-leetcode题解之第109题有序链表转换二叉搜索树.zip
- java-leetcode题解之第108题将有序数组转换为二叉搜索树.zip
- java-leetcode题解之第107题二叉树的层序遍历II.zip
- java-leetcode题解之第102题二叉树的层序遍历.zip
- java-leetcode题解之第103题二叉树的锯齿形层序遍历.zip
- java-leetcode题解之第104题二叉树的最大深度.zip
- java-leetcode题解之第173题二叉搜索树迭代器.zip
- java-leetcode题解之第100题相同的树.zip
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
安全验证
文档复制为VIP权益,开通VIP直接复制
信息提交成功