# # -*- coding: utf-8 -*-
#
# import re
#
# import execjs
# import requests
# from ddddocr import DdddOcr
# from pyquery import PyQuery as pq
#
# ocr = DdddOcr()
# with open('yuanrenxue.js', 'r', encoding='utf-8') as f:
# jscode = f.read()
# ctx = execjs.compile(jscode)
#
# headers = {
# "accept": "application/json, text/javascript, */*; q=0.01",
# "accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
# "referer": "https://match.yuanrenxue.cn/match/4",
# "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
# "x-requested-with": "XMLHttpRequest"
# }
#
# url = "https://match.yuanrenxue.cn/api/match/4"
# params = {
# "page": "1"
# }
# response = requests.get(url, headers=headers, cookies=None, params=params)
# json_data = response.json()
# items = pq(json_data['info'])('td').items()
# key, value = json_data['key'], json_data['value']
# for item in items:
# imgs = item('img.img_number').items()
# md5_value = ctx.call('yrx_md5', {"key": key, "value": value})
# data = {}
# for i, img in enumerate(imgs):
# img_class = img.attr('class')
# if md5_value in img_class:
# continue
# style = re.findall(r'-?\d+\.?\d*', img.attr('style'))[0]
# b64 = re.sub('data:image/png;base64,', '', img.attr('src')).strip()
# num = ocr.classification(b64)
# data[i] = (style, num)
# print(data)
# -*- coding: utf-8 -*-
import re
import execjs
import requests
from ddddocr import DdddOcr
from pyquery import PyQuery as pq
from loguru import logger
class YuanRenXueMatch(object):
def __init__(self):
self.ocr = DdddOcr()
with open('yuanrenxue.js', 'r', encoding='utf-8') as f:
self.jscode = f.read()
self.ctx = execjs.compile(self.jscode)
self.headers = {
"accept": "application/json, text/javascript, */*; q=0.01",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7",
"referer": "https://match.yuanrenxue.cn/match/4",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
"x-requested-with": "XMLHttpRequest"
}
self.url = "https://match.yuanrenxue.cn/api/match/4"
def rearrange_and_convert(self, data: dict) -> int:
tmp_data = {k + int(float(v[0]) / 11.5): v[1] for k, v in data.items()}
sorted_values = [v for k, v in sorted(tmp_data.items())]
num_str = ''.join(sorted_values)
return int(num_str)
def fetch_data(self, page):
params = {"page": str(page)}
# url = self.base_url.format(page=page)
logger.debug('Fetching page: {}'.format(page))
response = requests.get(self.url, headers=self.headers, params=params)
return response.json()
def process_data(self, data):
items = pq(data['info'])('td').items()
key, value = data['key'], data['value']
for item in items:
imgs = item('img.img_number').items()
md5_value = self.ctx.call('yrx_md5', {"key": key, "value": value})
result = {}
index = 0
for img in imgs:
img_class = img.attr('class')
if md5_value in img_class:
continue
style = re.findall(r'-?\d+\.?\d*', img.attr('style'))[0]
b64 = re.sub('data:image/png;base64,', '', img.attr('src')).strip()
num = self.ocr.classification(b64)
result[index] = (style, num)
index += 1
num_value = self.rearrange_and_convert(result)
logger.debug('原始数值:{}'.format(str(result)))
logger.debug('调整后数值:{}'.format(num_value))
yield num_value
def main(self):
sum = 0
for page in range(1, 6):
json_data = self.fetch_data(page)
for num_value in self.process_data(json_data):
sum += num_value
logger.debug('5页的全部数字加和:{}'.format(sum))
@classmethod
def run(cls):
YuanRenXueMatch().main()
if __name__ == "__main__":
YuanRenXueMatch.run()
Javascript逆向分析 猿人学 第四题 雪碧图 样式干扰
需积分: 0 43 浏览量
2024-02-29
20:58:30
上传
评论
收藏 4KB ZIP 举报
诗雅颂
- 粉丝: 847
- 资源: 16
最新资源
- 基于matlab实现车牌识别程序,和论文,自己做的,做毕业设计的可以看看 .rar
- Windows系统下安装与配置Neo4j的步骤
- 基于matlab实现潮流计算和最优潮流计算的程序1,对毕业设计有一定用处.rar
- 基于大数据学习资源推荐系统的设计与实现(部署视频)-kaic.mp4
- 哈工大形式语言和自动机2022期末含答案
- Windows系统下安装与配置Neo4j的步骤
- 哈希算法(Hash Algorithm)是一种将任意长度的二进制数据映射为较短的、固定长度的二进制值的函数.txt
- Windows系统下安装与配置Neo4j的步骤
- 在二叉树或更复杂的树形结构中,先序输出叶结点.txt
- 列出所有祖先结点的概念通常与树形结构或图论中的节点相关.txt
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈