aiohttp+多任务异步协程实现异步爬⾍⾸先构建爬取的⽹页,这⾥使⽤django渲染三个简单的页⾯,每个视图函数都sleep2秒⽤来模拟数据爬取的等待时间django视图页⾯
from django.shortcuts import render
from django.http import HttpResponse
import time
def index(request):
return HttpResponse('hello world')
def one(request):
time.sleep(2)
return HttpResponse('hello one')
def two(request):
time.sleep(2)
return HttpResponse('hello two')
def three(request):
time.sleep(2)
return HttpResponse('hello three')
异步爬⾍
import aiohttp # 使⽤该模块中的ClientSession
import asyncio
import requests
import time
start = time.time()
urls = ["127.0.0.1:8000/one", "127.0.0.1:8000/two", "127.0.0.1:8000/three"]
async def get_page(url):
print('开始下载......')
# ()是基于同步,必须使⽤基于异步的⽹络请求模块进⾏url请求发送
# response = (url)
# aiohttp基于异步请求的⽹络模块
async with aiohttp.ClientSession() as session:
async with (url) as response:
# text()⽅法返回字符串形式的数据
# read()⽅法返回⼆进制形式的数据
# json()⽅法返回json形式的数据session下载
# 注意获取响应数据前要await挂起
page_text = ()
print(page_text)
print('下载结束')
tasks = []
for url in urls:
result = get_page(url)
task = sure_future(result)
tasks.append(task)
loop = _event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print(end - start)
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论