python杂七杂八小项目合集.zip资源-CSDN文库

共162个文件

py：126个

pyc：9个

md：5个

python

毕业设计

课程设计

项目开发

爬虫

需积分: 4 20 浏览量 2024-01-11 11:34:49 上传评论收藏 16.06MB ZIP 举报

资源推荐

资源详情

资源评论

收起资源包目录

python杂七杂八小项目合集.zip （162个子文件）

mmap_address.bin 8B

test.c 467B

test2.c 283B

error1.c 170B

error2.c 116B

刘金明语法分析器报告.docx 140KB

.gitignore 37B

settings.json 45B

settings.json 44B

README.md 10KB

README.md 9KB

LICENSE.md 1KB

README.md 1KB

README.md 352B

刘金明语法分析器报告.pdf 234KB

mnist.pkl 52.4MB

params.pkl 3.31MB

deep_convnet_params.pkl 966KB

sample_weight.pkl 178KB

test.png 242KB

lena.png 115KB

lena_gray.png 42KB

cnblog.py 11KB

juejin2.py 11KB

aliyun.py 10KB

juejin3.2.py 9KB

segment.py 9KB

aliyun.1.py 9KB

juejin3.1.py 9KB

layers.py 8KB

multi_layer_net_extend.py 7KB

get_predict_table.py 6KB

lexer.py 6KB

deep_convnet.py 6KB

simple_convnet.py 5KB

multi_layer_net.py 5KB

meizitu.py 4KB

parser.py 4KB

optimizer.py 4KB

generate.py 4KB

mnist.py 3KB

LL.py 3KB

trainer.py 3KB

function.py 3KB

batch_norm_test.py 3KB

hyperparameter_optimization.py 3KB

8_myAtoi.py 3KB

util.py 3KB

two_layer_net.py 2KB

22_generateParenthesis.py 2KB

csdn.py 2KB

longestPalindrome.py 2KB

get_url.py 2KB

overfit_weight_decay.py 2KB

weight_init_compare.py 2KB

optimizer_compare_mnist.py 2KB

lanzhou.py 2KB

18_fourSum.py 2KB

12_intToRoman.py 2KB

grad_descent.py 2KB

10_isMatch.py 2KB

train_neuralnet.py 2KB

fish_game.py 2KB

13_romanToInt.py 2KB

two_list_sum.py 2KB

misclassified_mnist.py 2KB

apply_filter.py 2KB

gradient_2d.py 2KB

baidu.py 2KB

tianqi.py 2KB

download.py 2KB

16_threeSumClosest.py 1KB

optimizer_compare_naive.py 1KB

19_removeNthFromEnd.py 1KB

overfit_dropout.py 1KB

train_convnet.py 1KB

15_threeSum.py 1KB

train_neuralnet.py 1KB

17_letterCombinations.py 1KB

weight_init_activation_histogram.py 1KB

gradient.py 1KB

21_mergeTwoLists.py 1KB

csdn_test.py 1KB

11_maxArea.py 1KB

functions.py 1KB

neuralnet_mnist_batch.py 1KB

TwoLayerNet.py 1KB

train_neuralnet2.py 1KB

neuralnet_mnist.py 1KB

two_sum.py 1KB

buy_apple_orange.py 988B

14_longestCommonPrefix.py 986B

共 162 条

写了挺多网站的爬虫，一直想把所有网站整合成一个爬虫程序，不过没这个时间下面简单展示掘金网站的文章爬 > 本爬虫会将爬取的内容直接上传到wordpress网站的数据库，所以运行爬虫前请填写好数据库的相关信息如果不想传到数据库，请自行改动代码，将数据写到本地！ ```python #pytest #coding=utf-8 #!/usr/bin/python # IT网站文章爬虫 # 作者：小鸡 flyphp@outlook.com import pymysql import time from bs4 import BeautifulSoup import requests,sys import datetime import re import json import urllib import urllib2 import types # 连接数据库需要，IP，user，密码，库名 db = pymysql.connect(host='localhost', user="root", passwd='your passwd', db='www_idealli_com') # 文章头部加入自己的广告 ads = "" cur_tag = '' today = '2019-02-18 18:09:06' urls = [] tar = [] link_head = "https://juejin.im" otar = [] ourls = [] ii = 0 it = 0 cur_page_num = 0 cur_tag_num = 2 last_post_id = 0 check_title_time = 1 # url_fo = open("mysql_urls.log", "w") # car_fo = open("mysql_cars.log", "w") ################################################# # 文章插入，数据库部分 ################################################# def tag_id(tag_name): # check tag cursor = db.cursor() check_tag = "SELECT `term_id` FROM `wp_terms` WHERE `name` = '" + tag_name + "'" try: cursor.execute(check_tag) except Exception as e: print(e) tag_id = cursor.fetchone() tag_data = cursor.fetchall() if tag_id is None: try: print(tag_name) insert_tag = "INSERT INTO `wp_terms` (`term_id`, `name`, `slug`, `term_group`) VALUES (NULL, '"+ tag_name +"','" + tag_name + "', '0')" cursor.execute(insert_tag) cursor.execute(insert_tag) check_tag = "SELECT `term_id` FROM `wp_terms` WHERE `name` = '" + tag_name + "'" cursor.execute(check_tag) tag_data = cursor.fetchall() insert_taxonomy_tag = "INSERT INTO `wp_term_taxonomy` (`term_taxonomy_id`, `term_id`, `taxonomy`, `description`, `parent`, `count`) VALUES (NULL, '" + str(tag_data[0][0]) + "', 'post_tag', '', '0', '1')" insert_taxonomy_category = "INSERT INTO `wp_term_taxonomy` (`term_taxonomy_id`, `term_id`, `taxonomy`, `description`, `parent`, `count`) VALUES (NULL, '" + str(tag_data[1][0]) + "', 'category', '', '0', '1')" cursor.execute(insert_taxonomy_tag) cursor.execute(insert_taxonomy_category) db.commit() except Exception as e: print(e) db.rollback() try: cursor.execute(check_tag) except Exception as e: print(e) tag_data = cursor.fetchall() check_taxo = "SELECT `term_taxonomy_id` FROM `wp_term_taxonomy` WHERE `term_id` = '" + str(tag_data[0][0]) + "'" + "or `term_id` = '" + str(tag_data[1][0]) + "'" try: cursor.execute(check_taxo) except Exception as e: print(e) #print(cursor.fetchall()) taxo_data = cursor.fetchall() if len(taxo_data) == 0: insert_taxonomy_tag = "INSERT INTO `wp_term_taxonomy` (`term_taxonomy_id`, `term_id`, `taxonomy`, `description`, `parent`, `count`) VALUES (NULL, '" + str(tag_data[0][0]) + "', 'post_tag', '', '0', '1')" insert_taxonomy_category = "INSERT INTO `wp_term_taxonomy` (`term_taxonomy_id`, `term_id`, `taxonomy`, `description`, `parent`, `count`) VALUES (NULL, '" + str(tag_data[1][0]) + "', 'category', '', '0', '1')" cursor.execute(insert_taxonomy_tag) cursor.execute(insert_taxonomy_category) db.commit() cursor.execute(check_taxo) #print(cursor.fetchall()) taxo_data = cursor.fetchall() return taxo_data def insert_post(ti,au,con,tag_idd, tim,db): title = ti.encode('utf-8') cursor = db.cursor() check_title = "select ID from wp_posts where ID>263100 and post_title='" + title + "';" cursor.execute(check_title) have_exit = cursor.fetchone() if have_exit is None: pass else: return 0 author = au.encode('utf-8') global last_post_id global check_title_time last_post_id = int(last_post_id) +1 content = con content = content.replace("'","`") time = tim taxo_data = tag_idd post_id = last_post_id try: # check author check_author = "SELECT `ID` FROM `wp_users` WHERE `display_name` LIKE '" + author + "'" cursor = db.cursor() cursor.execute(check_author) author_id = cursor.fetchone() if author_id is None: insert_author = "INSERT INTO `wp_users` (`ID`, `user_login`, `user_pass`, `user_nicename`, `user_email`, `user_url`, `user_registered`, `user_activation_key`, `user_status`, `display_name`) VALUES (NULL, '', '', '', '', '', '0000-00-00 00:00:00.000000', '', '0', '" + author +"')" try: cursor.execute(insert_author) db.commit() except Exception as e: db.rollback() check_author = "SELECT `ID` FROM `wp_users` WHERE `display_name` LIKE '" + author + "'" cursor.execute(check_author) author_id = cursor.fetchone() # creat new post post_id = str(last_post_id) insert_post = "INSERT INTO `wp_posts` (`ID`, `post_author`, `post_date`, `post_date_gmt`, `post_content`, `post_title`, `post_excerpt`, `post_status`, `comment_status`, `ping_status`, `post_password`, `post_name`, `to_ping`, `pinged`, `post_modified`, `post_modified_gmt`, `post_content_filtered`, `post_parent`, `guid`, `menu_order`, `post_type`, `post_mime_type`, `comment_count`) VALUES (" + post_id + ", '" + str(author_id[0]) + "', '" + str(time) + "', '" + str(time) + "', '" + str(content) + "', '" + str(title) + "', '', 'publish', 'open', 'open', '', '', '', '', '" + str(time) + "', '" + str(time) + "', '', '0', '', '0', 'post', '', '0')" try: cursor.execute(insert_post) insert_relationship = "INSERT INTO `wp_term_relationships` (`object_id`, `term_taxonomy_id`, `term_order`) VALUES ('" + post_id + "', '" + str(taxo_data[0][0]) + "', '0')" cursor.execute(insert_relationship) update_taxo = "UPDATE `wp_term_taxonomy` SET `count` = `count`+1 WHERE `wp_term_taxonomy`.`term_taxonomy_id` = " + str(taxo_data[0][0]) cursor.execute(update_taxo) insert_relationship = "INSERT INTO `wp_term_relationships` (`object_id`, `term_taxonomy_id`, `term_order`) VALUES ('" + post_id + "', '" + str(taxo_data[1][0]) + "', '0')" cursor.execute(insert_relationship) update_taxo = "UPDATE `wp_term_taxonomy` SET `count` = `count`+1 WHERE `wp_term_taxonomy`.`term_taxonomy_id` = " + str(taxo_data[1][0]) cursor.execute(update_taxo) db.commit() global ii tip = "\tsucceed catch post" + str(title) + "\n\t this is the"+ str(ii) + "post and tag " + str(cur_tag_num) print(tip) print("\thttps://flycode.co/archives/" + str(post_id)) ii = ii + 1 return 1 except Exception as e: db.rollback() print(e) return 0 except Exception as e: db.rollback() print(e) return 0 ################################################# # 链接获取，爬虫部分 ################################################# def get_post(lin, tim , tag_idd): link = lin requests.adapters.DEFAULT_RETRIES = 5 global ads global ads2 try: print('\n\ttry to get post ' + link) req = requests.get(url = link,timeout=10) bf = BeautifulSoup(req.text,"html.parser") req.close texts = bf.find('div',class_='article-content') if texts is None: print(lin) return author = bf.find_all('meta',itemprop='name')[0]['content'] title = bf.find_all('h1',class_='article-title')[0].text origin = u"<p>来源：<a target=_blank href=" origin = origin.encode('utf-8') texts = "<meta name=\"referrer\" content=\"never\">" + ads2 + str(texts).replace('data-src=','src=') + ads + origin + str(link) +" rel=noopener>" + str(link) + "</a></p>" #tag = bf.find('a',class_='tag').get('data-original-title') #time.sleep(1.5) except Exception as e: print(e) print("\t######################\n\n\terror!\n\n\t#######################") time.sleep(3) # get_url(nextlink) try: if insert_post(title, author,texts, tag_idd,tim , db) == 0: print('\t post ' + title + ' have exit !!!') return 0 else: return 1 except: return 0 def get_url(begin): # global tar # global

评论收藏

内容反馈