from selenium import webdriver
import json, base64, time
import sys
def send_devtools(driver, cmd, params={}):
resource = "/session/%s/chromium/send_command_and_get_result" % driver.session_id
url = driver.command_executor._url + resource
body = json.dumps({'cmd': cmd, 'params': params})
response = driver.command_executor._request('POST', url, body)
return response.get('value')
def save_as_pdf(driver, path, options={}):
result = send_devtools(driver, "Page.printToPDF", options)
with open(path, 'wb') as file:
file.write(base64.b64decode(result['data']))
if __name__ =="__main__":
# 访问页面,这里可以改成获取启动参数 argv[1]
print(sys.argv)
argv = sys.argv
if len(argv) < 3:
exit(0)
sourceurl = argv[1]
pdffilename = argv[2]
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--disable-gpu")
driver = webdriver.Chrome(options=options)
#url='http://www.lianjiang.gov.cn/zwgk/gjzwxx/tjxx/tjnb/content/post_1591970.html'
#url='http://stats.tj.gov.cn/tjsj_52032/tjgb/202203/t20220314_5828586.html'
#url='https://www.cnblogs.com/new-june/p/15347577.html'
#url='http://www.minxian.gov.cn/art/2021/12/11/art_8463_1474291.html'
url='http://www.huangling.gov.cn/gk/flxx/flxx/tjxx/tjgb/23280.htm'
driver.get(sourceurl)
time.sleep(2) #如果页面复杂,非静态页面,建议适当给延迟,等待页面彻底加载完成
#print_options={ 'landscape': False,'displayHeaderFooter':False,'paperWidth':11,'paperHeight':16,'preferCSSPageSize':False,'printBackground':True,'pageRanges':'1-10','ignore_invalid_page_ranges':True}
print_options = {'landscape': False,
'displayHeaderFooter': False,
'printBackground': True,
'paperWidth': 11.693,
'paperHeight': 16.537,
'marginTop':0.2,
'marginBottom':0.2,
'marginLeft':0.2,
'marginRight':0.2}
save_as_pdf(driver, pdffilename,print_options)
driver.quit()
评论0