从新浪微博上获取疫情最新信息
# -*- coding:utf-8 -*-
"""
Author: Edgar
Created time:2/1/2020 12:06 PM
爬取新浪微博中的相关信息
"""
import os
import json
import requests
import pymysql
class Virus(object):
def__init__(self):
super(Virus, self).__init__()
self.url ="interface.sina/news/wap/fymap2020_data.d.json"
self.header ={
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36"}
def get_json(self):
"""获取加载出来的json"""
response = (self.url, self.header)
try:
response.raise_for_status()
except:
print("获取json⽂件失败")
else:
return response.json()
def download_json(self, filename='data.json'):
"""下载json⽂件"""
flag =True
json_ = _json()
print(json_)
if not ists(filename):
with open(filename,"w")as file:
json.dump(json_,file)
else:
while flag:
answer =input("该⽬录已经存在⽂件 %s,是否删除该⽂件(y/n):  "% filename)
if answer in['y','Y']:
with open(filename,'w')as file:
json.dump(json_,file)
flag =False
elif answer in['n','N']:
return
else:
print("输⼊错误,请重新输⼊: ")
@staticmethod
def create_table():
connect = t("localhost",'root','Edgar','virus')
cursor = connect.cursor()
# 创建⼀个表来保存每个省市的信息
sql ="CREATE TABLE IF NOT EXISTS Virus_province(name VARCHAR(60) NOT NULL, ename varchar(20), value varchar(20), susNum varchar(20) , deathNum varchar(20), cureNum varchar(20), city TEXT)"
# 创建⼀个表来保存所有相关城市的相关信息
sql ="CREATE TABLE IF NOT EXISTS Virus_city(province VARCHAR(20),name VARCHAR(20) NO
T NULL ,conNum VARCHAR(20), susNum VARC HAR(20), cureNum VARCHAR(20), deathNum VARCHAR(20))"
# 保存全球疫情信息
sql ="CREATE TABLE IF NOT EXISTS Virus_world(name VARCHAR(20), value VARCHAR(20), susNum VARCHAR(20), deathNum VARCHAR(20), c ureNum VARCHAR(20))"
sql ="CREATE TABLE IF NOT EXISTS Virus_timeline(url varchar(100), title varchar(200), media varchar(40), date varchar(30));"
connectmit()
cursor.close()
connect.close()
@staticmethod
def insert_city(city):
connect = t("localhost",'root',"Edgar",'virus')
cursor = connect.cursor()
sql ='INSERT INTO virus_city(province,name, conNum, susNum, cureNum, deathNum) VALUES ("%s", "%s", "%s","%s","%s", "%s")'%(
<("province"),
<("name"),
<("conNum"),
<("susNum"),
<("cureNum"),
<("deathNum"))
connectmit()
cursor.close()
connect.close()
@staticmethod
def insert_province(province):
connect = t("localhost",'root','Edgar','virus')
cursor = connect.cursor()
sql ='INSERT INTO virus_province(name, ename, value, susNum, deathNum, cureNum, city) VALUES("%s","%s","%s","%s","%s","%s","%s")'%(
<("name"),
<("ename"),
<("value"),
<("susNum"),
<("deathNum"),
<("cureNum"),
",".join([i.get("name")for i ("city")])
connect和join的区别)
connectmit()
cursor.close()
connect.close()
@staticmethod
def insert_world(world):
connect = t("localhost",'root','Edgar','virus')
cursor = connect.cursor()
sql ='INSERT INTO virus_world(name, value, susNum, deathNum, cureNum) VALUES ("%s","%s","%s","%s","%s")'%(
<("name"), ("value"), ("susNum"),
<("deathNum"), ("cureNum"))
connectmit()
cursor.close()
connect.close()
@staticmethod
def insert_timeline(data):
connect = t("localhost",'root','Edgar','virus')
cursor = connect.cursor()
sql ="INSERT INTO virus_timeline(url, title, media, date) VALUES('%s','%s','%s','%s')"%(
<("url"), ("title"), ("media"),
<("date"))
connectmit()
cursor.close()
connect.close()
def upload_data(self):
data_json = _json()
data = ("data").get("list")
for i in data:
self.insert_province(i)
for city ("city"):
city["province"]= i.get("name")
self.insert_city(city)
for world in ("data").get("worldlist"):
self.insert_world(world)
<_timeline()
def refresh_data(self):
connect = t("localhost",'root','Edgar','virus')
cursor = connect.cursor()
sql ='TRUNCATE TABLE virus_province;'
sql ="TRUNCATE TABLE virus_city;"
sql ="TRUNCATE TABLE virus_world;"
sql ="TRUNCATE TABLE virus_timeline;"
connectmit()
cursor.close()
connect.close()
self.upload_data()
def get_timeline(self):
"""获取timeline中的信息"""
url ="interface.sina/wap_api/wap_std_subject_feed_list.d.json?component_id=_conf_13|wap_zt_std_theme_timeline|news.sina/zt_ d/yiqing0121&page={}"
count =0
while True:
response = (url.format(count), headers=self.header)
try:
response.raise_for_status()
except:
return
else:
data = response.json().get("result").get("data").get("data")
if data:
count +=1
for i in data:
self.insert_timeline(i)
else:
return
if __name__ =='__main__':
virus = Virus()
# ate_table()
# virus.download_json()
# virus.upload_data()
# _timeline()
爬取部分内容如下:
virus_world:
name value susNum deathNum cureNum 中国1441119544304328
德国8300
西班⽛1000
俄罗斯2000
柬埔寨1000
印度2000
virus_city:
province name conNum susNum cureNum deathNum 北京海淀区41000
北京怀柔区1000
北京丰台区16000
北京⼤兴区22000
北京东城区3000
北京昌平区12000
北京西城区22000
北京朝阳区35000
北京⽯景⼭区5000
最新代码见

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。