python爬虫爬取启信宝企业数据
cnames=read_canmes(r'未成功企业名.Txt')
filename=r'企业地址启信宝.Txt'
for i in range(1,2): #循环执行2次,将每次未成功爬取的企业名字记录到cnames中,便于再次查询
rest=[]
for cname in cnames:
search(cname,filename,rest)
cnames=rest
write_cnames(cnames,r'未成功企业名.Txt')
def read_canmes(filename):
df=pd.read_csv(filename)
cnames=list(set(df['单位名称']))
return cnames
def write_cnames(cnames,filename):
df=pd.DataFrame(cnames,columns=['单位名称'])
df.to_csv(filen