python抖⾳采集_python爬取抖⾳视频的实例详解import requests
import json
import revbs循环按某键停止
import os
from pprint import pprint as pp
import queue
class DouYin:
header = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'cache-control': 'max-age=0',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1',
}
def __init__(self, url=None):
self.url = _RealAddress(url)
# 获取⽤户视频的url
self.user_video_url = 'www.douyin/aweme/v1/aweme/post/?{0}'
self.user_id = re.search(r'user/(.*)\?', self.url).group(1)# ⽤户id
requests.packages.urllib3.disable_warnings()
self.session = requests.Session()
self.target_folder = ''# 创建⽂件的路径
self.queue = queue.Queue()# ⽣成⼀个队列对象
def user_info(self):
self.mkdir_dir()
p = os.popen('node fuck.js %s' % self.user_id) # 获取加密的signature
笔记本结束任务管理器快捷键signature = p.readlines()[0]
user_video_params = {
'user_id': str(self.user_id),
'count': '21',
'max_cursor': '0',
'aid': '1128',
'_signature': signature
}
# 获取下载视频的列表
def get_aweme_list(max_cursor=None):
if max_cursor:
user_video_params['max_cursor'] = str(max_cursor)
user_video_url = self.user_video_url.format(
'&'.join([key + '=' + user_video_params[key] for key in user_video_params])) # 拼接参数response = (
url=user_video_url, headers=self.header, verify=False)
contentJson = json.t.decode('utf-8')) # 将返回的进⾏utf8编码aweme_list = ('aweme_list', [])
for aweme in aweme_list:
video_name = (
'share_info', None).get('share_desc', None) # 视频的名字
video_url = ('video', None).get('play_addr', None).get(
'url_list', None)[0].replace('playwm', 'play') # 视频链接
self.queue.put((video_name, video_url)) # 将数据进队列
('has_more') == 1: # 判断后⾯是不是还有是1就是还有
return get_aweme_('max_cursor')) # 有的话获取参数max_cursor
get_aweme_list()
# 下载视频
def get_download(self):
while True:
video_name, video_url = ()
file_name = video_name + '.mp4'
fifo可以降低延时吗file_path = os.path.join(self.target_folder, file_name)
if not os.path.isfile(file_path):
print('download %s form %s.\n' % (file_name, video_url))
times = 0
while times < 10:
try:
response = (
url=video_url, stream=True, timeout=10, verify=False) # 开启流下载
with open(file_path, 'wb') as f:
for chunk in response.iter_content(1024): # 返回迭代对象
f.write(chunk)
print('下载成功')
break
except:
print('下载失败')
times += 1
# 创建对应的⽂件夹
def mkdir_dir(self):
current_folder = os.getcwd()
self.target_folder = os.path.join(
current_folder, 'download/%s' % self.user_id)
if not os.path.isdir(self.target_folder):
os.mkdir(self.target_folder)css hacker
# 短链接转长地址
def get_RealAddress(self, url):
if url.find('v.douyin') < 0:
return url
response = (
url=url, headers=self.header, allow_redirects=False) # allow_redirects 允许跳转return response.headers['Location']
if __name__ == '__main__':
douyin = DouYin(url='v.douyin/J2B9Sk/')
二建报名douyin.user_info()
抖音python入门教程_download()
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论