# -*- coding: utf-8 -*-
# @Author: LC
# @Date: 2016-08-15 22:34:08
# @Last modified by: LC
# @Last Modified time: 2017-03-23 17:25:42
# @Email: liangchaowu5@gmail.com
###################################################################################
# Function: simulate some actions manipulated by humans with gui, including:
# 1. sign up and sign in
# 2. search keywords and visit target product
# 3. add product to cart
###################################################################################
import time
import random
import requests
import redis
import sys
import string
from user_agent import generate_user_agent
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.proxy import *
# change mac address is useless
# from scripts.ChangeMacAddress import change_mac_address, generate_mac_address
from get_proxy_and_user_information.ConnectRedis import get_connection
from get_proxy_and_user_information.IgnoreWarnings import ignore_warnings
from get_proxy_and_user_information.GetProxy import get_valid_proxy
from record_product_information.VisitRecord import update_record
class Robot:
def __init__(self, proxy):
"""init the webdriver by setting the proxy and user-agent
Args:
proxy (str): proxy in the form of ip:port
"""
# set proxy
ip, port = proxy.split(':')
profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", ip)
profile.set_preference("network.proxy.http_port", port)
# set user_agent
profile.set_preference("general.useragent.override", generate_user_agent())
profile.update_preferences()
self.driver = webdriver.Firefox(firefox_profile=profile)
print 'current proxy: %s'%proxy
def sign_up(self, sign_up_form, sign_up_url = r'https://www.amazon.com/ap/register?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2Fgp%2Fyourstore%2Fhome%3Fie%3DUTF8%26ref_%3Dnav_custrec_newcust'):
"""sign up with randomly generate user
Args:
sign_up_form (dict): some infomation required to sign up: name, e-mail and password
sign_up_url (str, optional): url to sign up, custom url can jumps to the target url after signing up
"""
# generate and change mac address
# mac = generate_mac_address()
#change_mac_address(mac)
try:
self.driver.get(sign_up_url)
for k,v in sign_up_form.items():
inputElement = self.driver.find_element_by_name(k)
inputElement.send_keys(v)
time.sleep(5)
inputElement.submit()
user_info = sign_up_form['email']+'#'+sign_up_form['password']+'#'+mac
self.store_registered_user(user_info)
except Exception, e:
print 'Error while signing up\n%s'%e.message
self.exit_driver()
sys.exit(0)
def sign_in(self, sign_in_url = r'https://www.amazon.com/ap/signin?_encoding=UTF8&openid.assoc_handle=usflex&openid.claimed_id=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.identity=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0%2Fidentifier_select&openid.mode=checkid_setup&openid.ns=http%3A%2F%2Fspecs.openid.net%2Fauth%2F2.0&openid.ns.pape=http%3A%2F%2Fspecs.openid.net%2Fextensions%2Fpape%2F1.0&openid.pape.max_auth_age=0&openid.return_to=https%3A%2F%2Fwww.amazon.com%2F%3Fref_%3Dnav_signin'):
"""sign in with a registered user
Args:
sign_in_url (str, optional): url to sign in, custom url can jumps to the target url after signing in
"""
sign_in_form = {}
try:
# randomly get a user from redis
r = get_connection(DB = 1)
info = r.srandmember('china_users',1)[0].split('#')
if len(info) == 3:
mail_box, passwd, mac = info
elif len(info) == 2:
mail_box, mac = info
passwd = 'ScutAmazon1234$'
#change_mac_address(mac)
sign_in_form = {'email':mail_box, 'password':passwd}
# sign_in
self.driver.get(sign_in_url)
for k,v in sign_in_form.items():
inputElement = self.driver.find_element_by_name(k)
inputElement.send_keys(v)
time.sleep(5)
inputElement.submit()
except Exception, e:
print 'Error while getting a user from redis and signing in\n%s'%e.message
self.exit_driver()
sys.exit(0)
def simulate_browsing(self, words, asin , possibility, qid = None):
"""generate target url in terms of key words to search the item and the asin of the item,
visit the url and add to cart within certain probability
Args:
words (str): words used to search items, seperated by space
asin (str): ASIN of the item
possibility (flaot): probability of adding item to cart
"""
key_words = '+'.join(words.split())
if qid:
target_url = 'https://www.amazon.com/dp/%s/ie=UTF8&qid=%s&keywords=%s' %(asin, qid, key_words)
else:
target_url = 'https://www.amazon.com/dp/%s/ie=UTF8&keywords=%s' %(asin, key_words)
#self.search_keywords(key_words)
try:
self.driver.get(target_url)
update_record(asin, key_words, 'pv', number=1)
time.sleep(10)
"""
if random.random()< possibility:
self.add_to_cart()
time.sleep(5)
update_record(asin, key_words, 'cart', number=1)
print '========successfully add item to cart======'
"""
# add to wish list
wish_list = '#add-to-wishlist-button-submit'
self.driver.find_element_by_css_selector(wish_list).click()
time.sleep(15)
# alert = self.driver.switch_to_alert() # NoAlertPresentException
except ValueError, e:
print 'Error while visiting %s\n%s'%(target_url, e.message)
#self.exit_driver()
sys.exit(0)
def search_keywords(self, words):
"""type in keywords to search on the index page of amazon
Args:
words (str): words used to search items, seperated by space
"""
try:
self.driver.get(r'https://www.amazon.com/')
inputElement = self.driver.find_element_by_name('field-keywords')
inputElement.send_keys(words)
inputElement.submit()
except Exception, e:
print 'Error while searching keywords\n%s'%e.message
self.exit_driver()
sys.exit(0)
def add_to_cart(self):
"""add item to cart"""
cart = '#add-to-cart-button'
try:
self.driver.find_element_by_css_selector(cart).click()
print '================successfully add to cart==================='
time.sleep(5)
except Exception,e:
print 'Error while adding item to cart\n%s'%e.message
self.exit_driver()
sys.exit(0)
def generate_sign_up_user(self, random_password = False):
"""ramdomly generate a user to sign up
Args:
random_password (bool, optional): use uniform password or specific pa
没有合适的资源?快使用搜索试试~ 我知道了~
温馨提示
python爬虫 Amazon商品引流的 python 爬虫 AmazonRobot 是通过 python 实现的一个通过脚本自动访问Amazon上的商品的爬虫程序。主要实现了用户注册、根据给出的搜索词语和商品的 asin 号进行搜索并访问商品、按照一定概率将商品加入购物车等。同时通过动态修改UA ,维护代理池, 控制爬取速率防止被识别出是爬虫。由于需要解析网页的 JS 代码,整个代码主要依靠 selenium 来解析 JS 代码。 用到的数据库有 Redis 和 MySQL,Redis 主要用于存储代理池、用于注册的一些用户信息(姓名,电话,地址,visa卡等);MySQL用于存储被访问的商品的一些信息(asin号,访问日期,日pv量,商品的排名等)。需要先在代码中指定这两个数据库的地址。 除了 selenium, 还依赖的第三方库有:redis, MySQLdb, requests, bs4, user_agent;python版本为2.7
资源推荐
资源详情
资源评论
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![gz](https://img-home.csdnimg.cn/images/20210720083447.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![zip](https://img-home.csdnimg.cn/images/20210720083736.png)
![7z](https://img-home.csdnimg.cn/images/20210720083312.png)
![rar](https://img-home.csdnimg.cn/images/20210720083606.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
收起资源包目录
![package](https://csdnimg.cn/release/downloadcmsfe/public/img/package.f3fc750b.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![folder](https://csdnimg.cn/release/downloadcmsfe/public/img/folder.005fa2e5.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
![file-type](https://csdnimg.cn/release/download/static_files/pc/images/minetype/UNKNOWN.png)
共 14 条
- 1
资源评论
![avatar-default](https://csdnimg.cn/release/downloadcmsfe/public/img/lazyLogo2.1882d7f4.png)
![avatar](https://profile-avatar.csdnimg.cn/d5b8e16cddf148da83293d5bb68d9224_admin_maxin.jpg!1)
博士僧小星
- 粉丝: 2050
- 资源: 5919
![benefits](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-1.c8e153b4.png)
下载权益
![privilege](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-2.ec46750a.png)
C知道特权
![article](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-3.fc5e5fb6.png)
VIP文章
![course-privilege](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-4.320a6894.png)
课程特权
![rights](https://csdnimg.cn/release/downloadcmsfe/public/img/vip-rights-icon.fe0226a8.png)
开通VIP
上传资源 快速赚钱
我的内容管理 展开
我的资源 快来上传第一个资源
我的收益
登录查看自己的收益我的积分 登录查看自己的积分
我的C币 登录后查看C币余额
我的收藏
我的下载
下载帮助
![voice](https://csdnimg.cn/release/downloadcmsfe/public/img/voice.245cc511.png)
![center-task](https://csdnimg.cn/release/downloadcmsfe/public/img/center-task.c2eda91a.png)
最新资源
资源上传下载、课程学习等过程中有任何疑问或建议,欢迎提出宝贵意见哦~我们会及时处理!
点击此处反馈
![feedback](https://img-home.csdnimg.cn/images/20220527035711.png)
![feedback](https://img-home.csdnimg.cn/images/20220527035711.png)
![feedback-tip](https://img-home.csdnimg.cn/images/20220527035111.png)
安全验证
文档复制为VIP权益,开通VIP直接复制
![dialog-icon](https://csdnimg.cn/release/downloadcmsfe/public/img/green-success.6a4acb44.png)