毕业设计源码之微博舆情分析系统的设计与实现(python).zip

共49个文件

css：8个

json：8个

py：6个

版权申诉

java

毕业设计

课程设计

程序设计

源码

137 浏览量 2023-09-20 00:04:38 上传评论 2 收藏 2.94MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

毕业设计源码之微博舆情分析系统的设计与实现(python).zip （49个子文件）

项目部署说明.zip 525KB

myProject

项目部署说明.zip 525KB

xlwb_spider

spider

spiderObtain.py 5KB

app.py 601B

analyze

analyze.py 2KB

api

htmlAPI.py 379B

resource

xlwb_spider_xlwb_data.sql 12KB

hotword

hotword.py 2KB

.idea

dataSources.xml 917B

encodings.xml 258B

pojo

Content.py 209B

static

table.js 0B

form.js 1KB

lay-config.js 0B

jquery-confirm.min.js 0B

jquery-3.4.1.min.js 0B

api

init.json 839B

upload.json 135B

tableSelect.json 1KB

menus_bak.json 6KB

clear.json 55B

accepttable.json 6KB

table.json 2KB

menus.json 6KB

resource 0B

css

bootstrap-theme.min.css.map 25KB

bootstrap.min.css 118KB

bootstrap-theme.css.map 47KB

bootstrap-theme.css 26KB

public.css 1KB

bootstrap.css.map 380KB

layuimini.css 20KB

themes

default.css 4KB

bootstrap.css 143KB

bootstrap-theme.min.css 23KB

jquery-confirm.min.css 22KB

bootstrap.min.css.map 529KB

images

logo.png 74KB

home.png 146KB

pic.png 1.28MB

bg.jpg 26KB

favicon.ico 4KB

captcha.jpg 2KB

donate_qrcode.png 50KB

fonts

glyphicons-halflings-regular.svg 106KB

glyphicons-halflings-regular.ttf 44KB

glyphicons-halflings-regular.woff 23KB

glyphicons-halflings-regular.eot 20KB

glyphicons-halflings-regular.woff2 18KB

from flask import request,Blueprint,jsonify import requests from bs4 import BeautifulSoup import pymysql as pymysql from analyze.analyze import createIndex from hotword.hotword import hotwordCreate from pojo.Content import Content import analyze import re xlSpider = Blueprint('xlSpider', __name__) url='https://s.weibo.com/top/summary?Refer=top_hot&topnav=1&wvr=6' def getHTMLText(url,timeout=30): r=requests.get(url,timeout=timeout) r.raise_for_status() r.encoding=r.apparent_encoding return r.text @xlSpider.route("/createSpider") def createSpider(): html = getHTMLText(url) soup = BeautifulSoup(html, 'html.parser') list=[] list2=[] for item in soup.find_all('tr',class_=""): # 查找div的class为item的dom # print(item) #测试，查看电影item信息 # 保存一部电影信息 dict = { "url": "", "content": "", "score": "" } item = str(item) # 转换字符串 # innerHtml=BeautifulSoup(item, 'html.parser') # ahtml=innerHtml.find_all('a') # ahtml[0].string # ahtml[0]['href'] # 正则表达式查找影片链接 obj =re.findall(re.compile(r'<a href="(.*?)" target="_blank">(.*)</a>'), item) content=Content() if obj.__len__()>0: content.url = obj[0][0] dict.update({ "url":obj[0][0] }) if obj[0].__len__()>0: content.content=obj[0][1] dict.update({ "content": obj[0][1] }) # content = re.findall(re.compile(r'<a target="_blank">(.*)</a>'), item)[0] score = re.findall(re.compile(r'<span>(.*)</span>'), item) if score.__len__()>0: content.score=score[0] dict.update({ "score": score[0] }) list2.append(dict) list.append(content) saveToDB(list) # createIndex(list) # hotwordCreate(list) return "爬取创建成功" @xlSpider.route("/clearData") def clearData(): conn = getConn() conn.autocommit(1) cursor = conn.cursor() try: sql = "truncate xlwb_data" cursor.execute(sql) except: import traceback traceback.print_exc() # 发生错误时回滚 conn.rollback() finally: # 关闭游标连接 cursor.close() # 关闭数据库连接 conn.close() return "清空数据成功" @xlSpider.route("/createIndexDoc") def createIndexDoc(): conn = getConn() conn.autocommit(1) cursor = conn.cursor() try: sql = "SELECT distinct * FROM xlwb_data" cursor.execute(sql) data = cursor.fetchall() list = [] for d in data: content = Content() content.content=d[1] content.url=d[2] content.score=d[3] list.append(content) createIndex(list) hotwordCreate(list) except: import traceback traceback.print_exc() # 发生错误时回滚 conn.rollback() finally: # 关闭游标连接 cursor.close() # 关闭数据库连接 conn.close() return "成功" @xlSpider.route("/getDataFromDB") def getDataFromDB(): keyword=request.args.get("keyword") page=int(request.args.get("page")) limit=int(request.args.get("limit")) start=(page-1)*limit conn=getConn() conn.autocommit(1) cursor = conn.cursor() try: countSQL="select count(id) count from xlwb_data where 1=1 and content like '%{}%'".format(keyword) cursor.execute(countSQL); count =cursor.fetchone() sql="SELECT distinct * FROM xlwb_data WHERE 1=1 and content like '%{}%' limit {},{} ".format(keyword,start,limit) cursor.execute(sql) data = cursor.fetchall() list=[] for d in data: dict = { "id": d[0], "content": d[1], "url": d[2], "score":d[3] } list.append(dict) resp = { "code": 0, "msg": "", "data": list, "count": count } return jsonify(resp) except: import traceback traceback.print_exc() # 发生错误时回滚 conn.rollback() finally: # 关闭游标连接 cursor.close() # 关闭数据库连接 conn.close() return def getConn(): return pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='123456', db='xlwb_spider', charset='utf8') def saveToDB(list): conn = getConn() conn.autocommit(1) cursor=conn.cursor(); try: for item in list: if item.content=="": continue sql= 'INSERT INTO xlwb_data(content,url,score) VALUES ("{content}","{url}","{score}")'.format( content=pymysql.escape_string(item.content), url=pymysql.escape_string(item.getAbsoloteURL()), score=pymysql.escape_string(item.score) ) cursor.execute(sql) except: import traceback traceback.print_exc() # 发生错误时回滚 conn.rollback() finally: # 关闭游标连接 cursor.close() # 关闭数据库连接 conn.close()

评论收藏

内容反馈

版权申诉