"""
要想用另一种方式去执行js,而不是使用execjs,
我们可以使用subprocess.run 去执行
node -e "console.log(require('./web.js').myFun(params))"
也可以先node 再
var web = require('./web.js');
web.myFun(video_urls) 即打印最终结果
两种方式的前提条件是:本地环境有node,且需要在js文件中加入 module.exports.myFun = decrypt_m3u8;
封装了平时对execjs的使用
"""
import os
# from node_vm2 import VM, NodeVM
import re
import execjs # pip install PyExecJS
class ExecjsUtils:
def __init__(self, js_file_path: str, cwd=None):
self.__choice_execjs_runtime()
with open(js_file_path, 'r', encoding='utf-8') as f:
print(execjs.get().name) # Node.js (V8)
node = execjs.get() # 安装nodejs后,会得到运行环境名为:Node.js (V8)
self.__ctx = node.compile(f.read())
def __choice_execjs_runtime(self):
os.environ['EXECJS_RUNTIME'] = "Node"
def call(self, *args, **kwargs):
result = self.__ctx.call(*args, **kwargs)
return result
import requests
from lxml import etree
from urllib.parse import urlencode
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
requests = requests.session()
class Spider:
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Pragma": "no-cache",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"sec-ch-ua": "\"Not_A Brand\";v=\"99\", \"Google Chrome\";v=\"109\", \"Chromium\";v=\"109\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\""
}
head_url = 'https://www.nmpa.gov.cn/'
def __init__(self):
pass
def first_req_header(self):
response = requests.get(self.head_url, headers=self.headers, verify=False)
print("第一次响应状态;",response.status_code)
meta_content = re.search(r'<meta content="(.*?)">', response.text).group(1)
js_code = re.search(r'r="m">(.*?)</s', response.text).group(1)
print("re匹配 meta_content:", meta_content)
print("re匹配自执行js_code:", js_code)
end_code = f"""
Meta$content = "{meta_content}";
{js_code};;
function get_cookie(){{
return document.cookie;
}}
;;
globalThis.get_cookie = get_cookie;
function get_mme(){{
XMLHttpRequest.prototype.open("GET","http://www.fangdi.com.cn/service/freshHouse/queryProjectById.actin",true);
return XMLHttpRequest.prototype.uri;
}}
globalThis.get_mme = get_mme;
"""
return end_code
def get_js_utils(self,end_code):
head_file = 'rs5_debugger.js'
# head_file = 'rs_debugger_fina.js'
js_file_path = 'rs5_debugger_dev.js'
with open(head_file, 'r', encoding='utf-8') as r, open(js_file_path, 'w', encoding='utf-8') as w:
head_code = r.read()
w.write(head_code + end_code)
utils = ExecjsUtils(js_file_path)
return utils
def second_req_header(self,end_code:str):
self.utils = self.get_js_utils(end_code)
cookie_str: str = self.utils.call("get_cookie")
cookie_k, cookie_v = cookie_str.split(";", 1)[-1].split('=', 1)
cookie_k = cookie_k.strip()
cookie_v = cookie_v.strip()
print("最终cookie_t:", len(cookie_v),cookie_k, cookie_v)
requests.cookies.set(cookie_k, cookie_v)
self.headers.update(
{"Referer": "https://www.nmpa.gov.cn/","Sec-Fetch-Site": "same-origin"})
response = requests.get(self.head_url, headers=self.headers, verify=False)
response.encoding = 'utf-8'
print(f"第二次响应状态:{response.status_code}")
print(f"第二次响应内容:{response.text}")
def mm_req(self):
# js_file_path = 'rs_debugger_dev.js'
# js_file_path = 'rs_debugger_dev.js'
# utils = ExecjsUtils(js_file_path)
mme: str = self.utils.call("get_mme")
mme = mme.strip("http://www.fangdi.com.cn/service/freshHouse/queryProjectById.actin?MmEwMD=")
print("MmEwMD:",len(mme),mme)
simple_url = 'http://www.fangdi.com.cn/service/freshHouse/queryProjectById.actin'
params = {
"MmEwMD": mme
}
_url = simple_url + f'?{urlencode(params)}'
headers = {
"Host": "www.fangdi.com.cn",
"Pragma": "no-cache",
"Cache-Control": "no-cache",
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36",
"Content-Type": "application/x-www-form-urlencoded;charset=UTF-8",
"Origin": "http://www.fangdi.com.cn",
"Referer": "http://www.fangdi.com.cn/new_house/new_house_detail.html?project_id=b87cb3d04f878ca6",
"Accept-Language": "zh-CN,zh;q=0.9",
}
_data = {
"projectID": "b87cb3d04f878ca6"
}
data = urlencode(_data)
response = requests.post(_url, headers=headers, data=data, verify=False)
print(response.status_code,response.text)
def main(self):
end_code = self.first_req_header()
self.second_req_header(end_code)
# self.mm_req()
# self.mm_req()
if __name__ == '__main__':
Spider().main()
'''
mme: 355 http://www.fangdi.com.cn/service/freshHouse/queryProjectById.actin?MmEwMD=4nh651SnrwUUH9smp9lvSFmFjFs62QChZMeWxc4EAaphPieUuTRPA5pNa0.yRQD5u5yqdmwHptNGrqnc0qy3vw.ENIdddd6ddddedddddIdddduuXbiSXZ8Mwg3QBxRgozu8.Jl4FYqbXmi.Oel_sUT62knBgof3G5K1Nhv4nj6yopFyCYNQdnbVIgRTTBkrQJfojCuaaqPes1s5ss8NJzl.IIdddd6ddddedddddIddddeB_ZjsA8DeGJ7qOG41oD_5wiKodc9SCcxk1jgs8Kvfv
mme: 355 http://www.fangdi.com.cn/service/freshHouse/queryProjectById.actin?MmEwMD=4nh651SnrwUUH9smp9lvSFmFjFs62QChZMeWxc4EAaphPieUuTRPA5pNa0.yRQD5u5yqdmwHptNGrqnc0qy3vw.ENIdddd6ddddedddddIdddduuXbiSXZ8Mwg3QBxRgozu8.Jl4FYqbXmi.Oel_sUT62knBgof3G5K1Nhv4nj6yopFyCYNQdnbVIgRTTBkrQJfojCuaaqPes1s5ss8NJzl.IIdddd6ddddedddddIddddeB_ZjsA8DeGJ7qOG41oD_5wiKodc9SCcxk1jgs8Kvfv
'''