今天写的⼀个⽤爬⾍爬猫眼电影top100的完整代码这个是今天写的⼀个爬取猫眼电影top100的⼀个完整的代码
# coding:utf-8
import json
import requests
from bs4 import  BeautifulSoup
def get_one_page(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:59.0) Gecko/20100101 Firefox/59.0'}
Response = (url,headers=headers)
if Response.status_code == 200:
return None
except Exception:
return None
def parse_one_page(html):
Soup=BeautifulSoup(html,'html.parser',from_encoding='utf-8')
board_wrapper=Soup.find('dl',class_='board-wrapper')
dd_wag=board_wrapper.find_all('dd')
list=[]
for i in dd_wag:
rate=i.find('i').get_text(strip=True)
figure=i.find('a',class_='image-link').find('img',class_='board-img')['data-src']
movie=i.find('p',class_='name').find('a',attrs={'data-act':'boarditem-click'}).get_text(strip=True)
cast=i.find('p',class_='star').get_text(strip=True)[3:]
releasetime=i.find('p',class_='releasetime').get_text(strip=True)[5:]
mark=i.find('i',class_='integer').string+i.find('i',class_='fraction').get_text(strip=True)
x = {'rate':rate,有个叫什么代码的电影
'figure':figure,
'movie':movie,
'cast':cast,
'releasetime':releasetime,
'mark':mark
}
list.append(x)
return list
def write_content(content):
with open('D:/猫眼电影5.txt','a',encoding='utf-8') as f:
for i in content:
f.write(json.dumps(i,ensure_ascii=False)+'\n')
f.close()
if __name__=='__main__':
for i in range(10):
html=get_one_page('maoyan/board/4?offset=%s'%(i*10))
content=parse_one_page(html)
write_content(content)

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。