selenium代码实例
# 环境安装:pip install selenium
#  编码流程:
1.导报:from selenium import webdriver
2. 实例化某⼀款浏览器对象
3.⾃指定⾃动化操作代码
# 使⽤下⾯的⽅法,查指定的元素进⾏操作
find_element_by_id            根据id节点
find_elements_by_name        根据name
find_elements_by_xpath        根据xpath查
find_elements_by_tag_name    根据标签名
find_elements_by_class_name  根据class名字查
# 截屏保存
browser.save_screenshot(r'phantomjs\baidu.png')
# 退出驱动程序
driver.quit()
# ⾃动打开百度搜索⼈民币
# ⾃动打开百度搜索⼈民币
from selenium import webdriver
from time import sleep
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_')
<(url='www.baidu/')
sleep(2)
text_input = bro.find_element_by_id('kw')
# send_keys 给input标签输⼊
text_input.send_keys('⼈民币')
sleep(2)
bro.find_element_by_id('su').click()
sleep(3)
#获取当前的页⾯源码数据(渲染后的数据)
selenium xpath定位print(bro.page_source)
bro.quit()
#获取⾖瓣电影中更多电影详情数据
#获取⾖瓣电影中更多电影详情数据
from selenium import webdriver
from time import sleep
#⾕歌⽆头浏览器
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
url = 'movie.douban/typerank?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action='
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_',chrome_options=chrome_options)
<(url)
sleep(3)
# 执⾏JS代码,⾃动向下划
sleep(3)
sleep(3)
sleep(2)
page_text = bro.page_source
with open('./douban.html','w',encoding='utf-8') as fp:
fp.write(page_text)
sleep(1)
bro.quit()
#登录qq空间爬取主页
from selenium import webdriver
from time import sleep
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_')
url = 'qzone.qq/'
<(url=url)
sleep(2)
#定位到⼀个具体的iframe
bro.switch_to.frame('login_frame')
bro.find_element_by_id('switcher_plogin').click()
sleep(2)
bro.find_element_by_id('u').send_keys('332424')
bro.find_element_by_id('p').send_keys('dsaafa020@')
bro.find_element_by_id('login_button').click()
sleep(5)
page_text = bro.page_source
with open('qq.html','w',encoding='utf-8') as fp:
fp.write(page_text)
bro.quit()
PhantomJS使⽤(做⽆头浏览器)(被弃⽤)
PhantomJS的作者ariya在PhantomJS的GitHub页⾯的issue #15344中写道:由于缺乏积极的贡献,我将会存档该项⽬。如果将来我们⼜重新开发这个项⽬的话,这个项⽬还会被取出来。因此,所有的之前的关于PhantomJS 2.5(由 @Vitallium 提起)和PhantomJS 2.1.x(由
@pixiuPL 提起)的计划也会废弃。接下来,为了防⽌混淆,上述被废弃的版本的源码和⼆进制包也会被删除。在未来的通知之
前,PhantomJS 2.1.1将会是已知最后的稳定版本。
#获取⾖瓣电影中更多电影详情数据
from selenium import webdriver
from time import sleep
url = 'movie.douban/typerank?type_name=%E6%83%8A%E6%82%9A&type=19&interval_id=100:90&action='
bro = webdriver.PhantomJS(executable_path=r'C:\Users\Administrator\Desktop\爬⾍+数据\day_03_爬⾍\phantomjs-2.1.1-windows\')
<(url)
sleep(3)
sleep(3)
sleep(3)
sleep(2)
page_text = bro.page_source
with open('./douban.html','w',encoding='utf-8') as fp:
fp.write(page_text)
sleep(1)
bro.quit()
 # 爬取⽂章
from selenium import webdriver
from lxml import etree
#⾕歌⽆头浏览器
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
# Fiddler抓包历史⽂章URL
url = 'mp.weixin.qq/mp/profile_ext?action=home&__biz=MjM5NzU0MzU0Nw==&scene=124&uin=MzQxNDc2MTIxOQ%3D%3D&key=5fa6
7e91c99877c92cab8f76d9eba741f20e126dcf62c0a8a42af6c159ae91cc6d9b27dd799b89357259a82e1375e1f275a1960f43e003ac9b5baba11703172d08c
866f9bd6aa20534932779237f7fe8&devicetype=Windows+7&version=62080085&lang=zh_CN&a8scene=7&pass_ticket=bB%2BcRIlVVqJKLAN%2FLxVVoWiJ
XecI7JA3Ttwfs%2FWX0zIjxaW1KxSt6Z2wvmXr8tv0&winzoom=1'
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_',chrome_options=chrome_options)
<(url)
sleep(3)
sleep(3)
sleep(3)
sleep(2)
page_text = bro.page_source
with open('./douban.html','w',encoding='utf-8') as fp:
fp.write(page_text)
sleep(1)
bro.quit()
with open('./douban.html','r',encoding="utf-8") as f:
text_ad()
etree_page=etree.HTML(text_html)
# 获取所有⽂章的链接
div_list=etree_page.xpath("//div[@class='weui_media_box appmsg js_appmsg']/@hrefs")
# 下载⽂章每篇⽂章
for url in div_list:
try:
bro = webdriver.Chrome(executable_path=r'C:\Users\Administrator\Desktop\chromedriver_',chrome_options=chrome_options)        (url)
page_text = bro.page_source
t=etree.HTML(page_text)
text=t.xpath("//h2[@id='activity-name']/text()")[0].strip()
filename=r"C:\Users\Administrator\Desktop\html\%s.html" % text
with open(filename,'w',encoding='utf-8') as fp:
fp.write(page_text)
print(page_text)
except Exception as e:
print(e)
bro.quit()

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。