import requests
from bs4 import BeautifulSoup, NavigableString, Tag
from fake_useragent import UserAgent
BASE_URL = "https://ww1.gogoanime2.org"
def search_scraper(anime_name: str) -> list:
"""[summary]
Take an url and
return list of anime after scraping the site.
>>> type(search_scraper("demon_slayer"))
<class 'list'>
Args:
anime_name (str): [Name of anime]
Raises:
e: [Raises exception on failure]
Returns:
[list]: [List of animes]
"""
# concat the name to form the search url.
search_url = f"{BASE_URL}/search/{anime_name}"
response = requests.get(
search_url, headers={"UserAgent": UserAgent().chrome}, timeout=10
) # request the url.
# Is the response ok?
response.raise_for_status()
# parse with soup.
soup = BeautifulSoup(response.text, "html.parser")
# get list of anime
anime_ul = soup.find("ul", {"class": "items"})
if anime_ul is None or isinstance(anime_ul, NavigableString):
msg = f"Could not find and anime with name {anime_name}"
raise ValueError(msg)
anime_li = anime_ul.children
# for each anime, insert to list. the name and url.
anime_list = []
for anime in anime_li:
if isinstance(anime, Tag):
anime_url = anime.find("a")
if anime_url is None or isinstance(anime_url, NavigableString):
continue
anime_title = anime.find("a")
if anime_title is None or isinstance(anime_title, NavigableString):
continue
anime_list.append({"title": anime_title["title"], "url": anime_url["href"]})
return anime_list
def search_anime_episode_list(episode_endpoint: str) -> list:
"""[summary]
Take an url and
return list of episodes after scraping the site
for an url.
>>> type(search_anime_episode_list("/anime/kimetsu-no-yaiba"))
<class 'list'>
Args:
episode_endpoint (str): [Endpoint of episode]
Raises:
e: [description]
Returns:
[list]: [List of episodes]
"""
request_url = f"{BASE_URL}{episode_endpoint}"
response = requests.get(
url=request_url, headers={"UserAgent": UserAgent().chrome}, timeout=10
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
# With this id. get the episode list.
episode_page_ul = soup.find("ul", {"id": "episode_related"})
if episode_page_ul is None or isinstance(episode_page_ul, NavigableString):
msg = f"Could not find any anime eposiodes with name {anime_name}"
raise ValueError(msg)
episode_page_li = episode_page_ul.children
episode_list = []
for episode in episode_page_li:
if isinstance(episode, Tag):
url = episode.find("a")
if url is None or isinstance(url, NavigableString):
continue
title = episode.find("div", {"class": "name"})
if title is None or isinstance(title, NavigableString):
continue
episode_list.append(
{"title": title.text.replace(" ", ""), "url": url["href"]}
)
return episode_list
def get_anime_episode(episode_endpoint: str) -> list:
"""[summary]
Get click url and download url from episode url
>>> type(get_anime_episode("/watch/kimetsu-no-yaiba/1"))
<class 'list'>
Args:
episode_endpoint (str): [Endpoint of episode]
Raises:
e: [description]
Returns:
[list]: [List of download and watch url]
"""
episode_page_url = f"{BASE_URL}{episode_endpoint}"
response = requests.get(
url=episode_page_url, headers={"User-Agent": UserAgent().chrome}, timeout=10
)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
url = soup.find("iframe", {"id": "playerframe"})
if url is None or isinstance(url, NavigableString):
msg = f"Could not find url and download url from {episode_endpoint}"
raise RuntimeError(msg)
episode_url = url["src"]
if not isinstance(episode_url, str):
msg = f"Could not find url and download url from {episode_endpoint}"
raise RuntimeError(msg)
download_url = episode_url.replace("/embed/", "/playlist/") + ".m3u8"
return [f"{BASE_URL}{episode_url}", f"{BASE_URL}{download_url}"]
if __name__ == "__main__":
anime_name = input("Enter anime name: ").strip()
anime_list = search_scraper(anime_name)
print("\n")
if len(anime_list) == 0:
print("No anime found with this name")
else:
print(f"Found {len(anime_list)} results: ")
for i, anime in enumerate(anime_list):
anime_title = anime["title"]
print(f"{i+1}. {anime_title}")
anime_choice = int(input("\nPlease choose from the following list: ").strip())
chosen_anime = anime_list[anime_choice - 1]
print(f"You chose {chosen_anime['title']}. Searching for episodes...")
episode_list = search_anime_episode_list(chosen_anime["url"])
if len(episode_list) == 0:
print("No episode found for this anime")
else:
print(f"Found {len(episode_list)} results: ")
for i, episode in enumerate(episode_list):
print(f"{i+1}. {episode['title']}")
episode_choice = int(input("\nChoose an episode by serial no: ").strip())
chosen_episode = episode_list[episode_choice - 1]
print(f"You chose {chosen_episode['title']}. Searching...")
episode_url, download_url = get_anime_episode(chosen_episode["url"])
print(f"\nTo watch, ctrl+click on {episode_url}.")
print(f"To download, ctrl+click on {download_url}.")
python-web-programming.rar
需积分: 0 181 浏览量
更新于2024-06-22
收藏 30KB RAR 举报
Python Web编程是一个广泛的领域,它涵盖了使用Python语言构建网络应用程序的所有方面。Python因其简洁明了的语法和丰富的库支持而成为Web开发的热门选择。在"python-web_programming.rar"这个压缩包中,很可能是包含了一系列与Python Web编程相关的教程、代码示例或者项目资源。
在Python Web开发中,有几个关键的知识点是必须要掌握的:
1. **基础概念**:理解Web开发的基本原理,包括HTTP协议、请求和响应模型,以及客户端和服务器之间的交互。
2. **Web框架**:Python有许多流行的Web框架,如Django、Flask、Tornado等。Django是一个功能齐全的MVC(Model-View-Controller)框架,提供了ORM(对象关系映射)和强大的管理界面;Flask则以轻量级著称,允许开发者灵活构建应用;Tornado是一个异步网络库,适合高性能实时Web服务。
3. **模板引擎**:在Web开发中,模板引擎用于动态生成HTML页面。比如Django的Django Templates和Flask的Jinja2,它们提供了一种方式将数据和HTML结构分离,让开发者可以专注于逻辑而不必关心HTML细节。
4. **数据库操作**:Python提供了如SQLite、MySQL、PostgreSQL等数据库的接口,例如Python的SQLAlchemy库,它是一个高级SQL工具,可以简化数据库操作。
5. **表单处理和验证**:在Web应用中,用户输入的处理至关重要。通常会使用框架内置的表单类来创建表单,如Django的Form和Flask的WTForms,同时需要进行数据验证以确保输入的安全性。
6. **路由和URL映射**:Web框架允许开发者定义URL模式并关联到特定的视图函数,如Flask的`@app.route()`装饰器或Django的URLConf。
7. **会话管理**:保持用户状态通常需要用到会话管理,Python Web框架通常提供内置的会话中间件来处理这个问题。
8. **错误处理和日志记录**:良好的错误处理和日志记录是任何Web应用的关键部分,它们可以帮助开发者调试问题和监控应用运行情况。
9. **部署和运维**:了解如何将Python Web应用部署到生产环境,如使用Apache、Nginx等服务器,以及如何进行性能调优、安全配置等运维工作。
10. **RESTful API设计**:随着前后端分离的趋势,理解RESTful API的设计原则和实现也变得重要。这包括HTTP方法的正确使用(GET、POST、PUT、DELETE等)、状态码的设置、JSON数据格式等。
压缩包中的"web_programming"可能包含了上述知识点的实例、代码片段或完整的项目,通过学习这些内容,你可以深入理解Python Web编程,并具备实际开发能力。无论你是初学者还是有经验的开发者,都能从中获益。记得结合实际练习,理论知识和实践相结合,才能更好地掌握Python Web编程。