selenium设置proxy、...
本⽂介绍了selenium设置proxy、headers的⽅法,把phantomjs、Chrome、Firefox⼏个浏览器的设置⽅法都总结⼀下,分享给⼤家,也给⾃⼰留个笔记phantomjs
设置ip
⽅法1:
service_args = [
'--proxy=%s' % ip_html, # 代理 IP:prot (eg:192.168.0.28:808)
'--proxy-type=http', # 代理类型:http/https
‘--load-images=no', # 关闭图⽚加载(可选)
'--disk-cache=yes', # 开启缓存(可选)
'--ignore-ssl-errors=true' # 忽略https错误(可选)
]
driver = webdriver.PhantomJS(service_args=service_args)
⽅法2:
browser=webdriver.PhantomJS(PATH_PHANTOMJS)
# 利⽤DesiredCapabilities(代理设置)参数值,重新打开⼀个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问⼀次url
proxy=webdriver.Proxy()
proxy.proxy_type=ProxyType.MANUAL
proxy.http_proxy='1.9.171.51:800'
# 将代理设置添加到webdriver.DesiredCapabilities.PHANTOMJS中
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
<('1212.ip138/ic.asp')
print('1: ',browser.session_id)
print('2: ',browser.page_source)
print('3: ',_cookies())
还原为系统代理
# 还原为系统代理
proxy=webdriver.Proxy()
proxy.proxy_type=ProxyType.DIRECT
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
<('1212.ip138/ic.asp')
设置请求头
⽅法2
import random,requests,json
from selenium import webdriver
from selenium.webdrivermon.desired_capabilities import DesiredCapabilities
from selenium.webdrivermon.proxy import ProxyType
#随机获取⼀个ip
def proxies():
r = ("120.26.166.214:9840/JProxy/update/proxy/scoreproxy")
rr = json.)
hh = rr['ip'] + ":" + "8907"
print(hh)
return hh
ips =proxies()
#设置phantomjs请求头和代理⽅法⼀:
#-------------------------------------------------------------------------------------
# 设置代理
service_args = [
'--proxy=%s' % ips, # 代理 IP:prot (eg:192.168.0.28:808)
'--ssl-protocol=any', #忽略ssl协议
'--load - images = no', # 关闭图⽚加载(可选)
'--disk-cache=yes', # 开启缓存(可选)
'--ignore-ssl-errors=true' # 忽略https错误(可选)
]
#设置请求头
selenium获取cookieuser_agent = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) " +
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.57 Safari/537.36"
)
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = user_agent
driver = webdriver.PhantomJS(executable_path=r"C:\soft\phantomjs-2.1.1-windows\",
desired_capabilities=dcap,service_args=service_args)
<(url='www.baidu')
page=driver.page_source
print(page)
#设置phantomjs请求头和代理⽅法⼆:
#-------------------------------------------------------------------------------------
desired_capabilities = py()
# 从USER_AGENTS列表中随机选⼀个浏览器头,伪装浏览器
desired_capabilities["phantomjs.page.settings.userAgent"] = (random.choice('请求头池'))
# 不载⼊图⽚,爬页⾯速度会快很多
desired_capabilities["phantomjs.page.settings.loadImages"] = False
# 利⽤DesiredCapabilities(代理设置)参数值,重新打开⼀个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问⼀次url proxy = webdriver.Proxy()
proxy.proxy_type = ProxyType.MANUAL
proxy.http_proxy = random.choice('ip池')
proxy.add_to_capabilities(desired_capabilities)
phantomjs_driver = r'C:\phantomjs-2.1.1-windows\'
# 打开带配置信息的phantomJS浏览器
driver = webdriver.PhantomJS(executable_path=phantomjs_driver,desired_capabilities=desired_capabilities)
driver.start_session(desired_capabilities)
<(url='www.baidu')
page=driver.page_source
print(page)
# 隐式等待5秒,可以⾃⼰调节
driver.implicitly_wait(5)
# 设置10秒页⾯超时返回,类似于()的timeout选项,()没有timeout选项
# 以前遇到过(url)⼀直不返回,但也不报错的问题,这时程序会卡住,设置超时选项能解决这个问题。
driver.set_page_load_timeout(20)
# 设置10秒脚本超时时间
driver.set_script_timeout(20)
#翻页命令
firefox
import time
from selenium.webdrivermon.proxy import*
myProxy = '202.202.90.20:8080'
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'ftpProxy': myProxy,
'sslProxy': myProxy,
'noProxy': ''
})
profile = webdriver.FirefoxProfile()
if proxy:
profile = get_firefox_profile_with_proxy_set(profile, proxy)
if user_agent:
profile.set_preference("general.useragent.override", user_agent)
driver=webdriver.Firefox(proxy=proxy,profile=profile)
<('www.baidu')
time.sleep(3)
driver.quit()
firefox⽆头模式
from selenium import webdriver
# 创建的新实例驱动
options = webdriver.FirefoxOptions()
#⽕狐⽆头模式
options.add_argument('--headless')
options.add_argument('--disable-gpu')
# options.add_argument('window-size=1200x600')
executable_path='./source/'
driver_path = webdriver.Firefox(firefox_options=options,executable_path=executable_path)
chrome
# !/usr/bin/python
# -*- coding: utf-8 -*-
from selenium import webdriver
# 进⼊浏览器设置
options = webdriver.ChromeOptions()
#⾕歌⽆头模式
options.add_argument('--headless')
options.add_argument('--disable-gpu')
# options.add_argument('window-size=1200x600')
# 设置中⽂
options.add_argument('lang=zh_CN.UTF-8')
# 更换头部
options.add_argument('user-agent="Mozilla/5.0 (iPod; U; CPU iPhone OS 2_1 like Mac OS X; ja-jp) AppleWebKit/525.18.1 (KHTML, like Gecko) Version/3.1.1 Mobile/5F137 Safari/525.20"') #设置代理
if proxy:
options.add_argument('proxy-server=' + proxy)
if user_agent:
options.add_argument('user-agent=' + user_agent)
browser = webdriver.Chrome(chrome_options=options)
url = "/get?show_env=1"
<(url)
browser.quit()
selenium设置chrome–cookie
# !/usr/bin/python
# -*- coding: utf-8 -*-
from selenium import webdriver
browser = webdriver.Chrome()
url = "www.baidu/"
<(url)
# 通过js新打开⼀个窗⼝
newwindow='window.open("www.baidu");'
# 删除原来的cookie
browser.delete_all_cookies()
# 携带cookie打开
browser.add_cookie({'name':'ABC','value':'DEF'})
# 通过js新打开⼀个窗⼝
input("查看效果")
browser.quit()
selenium设置chrome-图⽚不加载
from selenium import webdriver
options = webdriver.ChromeOptions()
prefs = {
'profile.default_content_setting_values': {
'images': 2
}
}
options.add_experimental_option('prefs', prefs)
browser = webdriver.Chrome(chrome_options=options)
# browser = webdriver.Chrome()
url = "image.baidu/"
<(url)
input("是否有图")
browser.quit()
以上就是本⽂的全部内容,希望对⼤家的学习有所帮助,也希望⼤家多多⽀持。
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论