#coding=utf-8
#python爬取csdn博客 -- 基于selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
import sys
import requests
import re
class CSDN:
def login(self):
driver = webdriver.Chrome()
driver.get("https://passport.csdn.net/?service=http://write.blog.csdn.net/")
elem = driver.find_element_by_name("username")
elem_1 = driver.find_element_by_name("password")
elem.send_keys("手机号")
elem_1.send_keys("密码")
driver.find_element_by_xpath("//input[@type='button']").click()
cookie = driver.get_cookies()[0]['value']
self.headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0',
'Referer':'http://write.blog.csdn.net/postlist',
'Host':'blog.csdn.net',
'Cookie':cookie
}
self.data= driver.page_source
def myBlog(self):
#data_1 = requests.get('http://blog.csdn.net/liuxuefeng12',headers=self.headers)
data_2 = requests.get('http://blog.csdn.net/liuxuefeng12/article/details/49904299',headers=self.headers)
#print data_2
pattern = re.compile('<title>(.*?)</title>.*?<h1>.*?<a href=.*?>(.*?)</a>.*?</h1>',re.S)
items = re.findall(pattern,data_2.text)
for item in items:
print( item[0],item[1])
def start(self):
self.login()
self.myBlog()
csdn = CSDN()
csdn.start()