2020最新全国省市区街道居委五级区划代码
⼀、说明:
在⽹上百度下载省市区等数据,不是要积分下载就是要钱,我等穷逼既⽆积分⼜⽆钱,那就只有另想办法,学过⼏天python,⽤python将就抓点数据,借鉴别⼈⼀些写法,再修修补补,调试bug,基本上可以运⾏,并将抓取的数据保存⾄MySQL数据库中(抓取之前换成⾃⼰的数据库配置)。开发环境python3.6,下⾯附上源码,对python略懂基本,代码写得不好,如有更好的抓取⽅式和写法,欢迎指导。
⼆、代码
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# author=yjrowe
import sys
import os
import re
from urllib import request
from bs4 import BeautifulSoup
import pymysql
import requests
import time
# 请求页⾯
def http_request(url, charset='utf8'):
try:
print(url)
data = (url, headers=header, timeout=5)
kk = t
kk = kk.decode(charset)
except Exception as e:
print(e)
kk = http_request(url, charset)
return kk
# 获取全国省份和直辖市
def province():
print('爬取省数据中。。。')
t = http_request(url, 'gbk')
province_list = {}
# if t:
# soup = BeautifulSoup(t, 'html.parser')
# for i in soup.find_all(attrs={'class': 'provincetr'}):
# for a in i.find_all('a'):
# id = re.sub("\D", "", a.get('href'))
# province_list[id] = {'id': id, 'name': a.text, 'code': id, 'href': url + a.get('href')}
# #print(province_list[id])
# # exit(province_list)
province_list[0] = {'id': '43', 'name': '湖南省', 'code': '43', 'href': 'v/tjsj/tjbz/tjyqhdmhcxhfdm/2020/43.html'}
insert_data('province', province_list)
return province_list
# 获取省下级市
def city(province_list):
print('爬取市数据中。。。')
city_list = {}
# for i in province_list:
# t = http_request(province_list[i]['href'], 'gbk')
# parent_href = get_parent_url(province_list[i]['href'])
# if not t:
# continue
# soup = BeautifulSoup(t, 'html.parser')
# for v in soup.find_all(attrs={'class': 'citytr'}):
# id = str(v.find_all('td')[0].text)
# city_href = str(v.find_all('td')[1].find_all('a')[0].get('href'))
# city_list[id[0:4]] = {'id': id[0:4], 'name': str(v.find_all('td')[1].text), 'province_id': i, 'code': id,
# 'href': parent_href + city_href}
city_list['4301'] = {'id': '4301', 'name': '长沙市', 'province_id': 0, 'code': '430100000000', 'href': 'v/tjsj/tjbz/tjyqhdmhcxhfdm/2020/43/4301.html'}
#insert_data('city', city_list)
return city_list
# 获取市下级区县
def country(city_list):
print('爬取区县数据中。。。')
county_list = {}
for i in city_list:
t = http_request(city_list[i]['href'], 'gbk')
parent_href = get_parent_url(city_list[i]['href'])
if not t:
continue
soup = BeautifulSoup(t, 'html.parser')
for v in soup.find_all(attrs={'class': 'countytr'}):
id = str(v.find_all('td')[0].text)
if len(v.find_all('td')[1].find_all('a')):
country_href = str(v.find_all('td')[1].find_all('a')[0].get('href'))
else:
continue
county_list[id[0:6]] = {'id': id[0:6], 'name': str(v.find_all('td')[1].text), 'city_id': i, 'code': id,
'href': parent_href + country_href}
insert_data('country', county_list)
return county_list
# 县下级街道、镇
def street(county_list):
print('爬取街道数据中。。。')
street_list = {}
for i in county_list:
t = http_request(county_list[i]['href'], 'gbk')
parent_href = get_parent_url(county_list[i]['href'])
if not t:
continue
soup = BeautifulSoup(t, 'html.parser')
time.sleep(1)
for v in soup.find_all(attrs={'class': 'towntr'}):
id = str(v.find_all('td')[0].text)
street_href = str(v.find_all('td')[1].find_all('a')[0].get('href'))
street_list[id[0:9]] = {'id': id[0:9], 'name': str(v.find_all('td')[1].text), 'country_id': i, 'code': id,
'href': parent_href + street_href}
insert_data('street', street_list)
return street_list
# 社区、街道办
def community(street_list):
print('爬取社区数据中。。。')
community_list = {}
for i in street_list:
t = http_request(street_list[i]['href'], 'gbk')
if not t:
continue
soup = BeautifulSoup(t, 'html.parser')
time.sleep(1)
for v in soup.find_all(attrs={'class': 'villagetr'}):
id = str(v.find_all('td')[0].text)
community_list[id[0:12]] = {'id': id[0:12], 'name': str(v.find_all('td')[2].text), 'street_id': i,
'code': id, 'category': str(v.find_all('td')[1].text)}
insert_data('community', community_list)
return community_list
# 获取上级url
def get_parent_url(href):
# print('上级链接:' + href)
arr = href.split('/')
last_value = arr[len(arr) - 1]
mysql下载要钱吗
href = place(last_value, '')
return href
# 插⼊数据
def insert_data(table, data):
if len(data) == 0:
return False
keys = list(data.keys())
table_keys = '`'+'`,`'.join(list(data[keys[0]].keys()))+'`'
for k in range(0, len(keys)):
table_values = '\''+'\',\''.join(list(data[keys[int(k)]].values()))+'\''
sql = 'INSERT INTO %s (%s) VALUES (%s)' % (table, table_keys, table_values)
# exit(sql)
try:
except Exception as e:
print(e)
connmit()
if__name__ == '__main__':
conn = t(host='139.199.16.31', port=3379, user='root', passwd='Yunzin@yz123', db='yz_cbe', charset='utf8') cursor = conn.cursor()
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
url = 'v/tjsj/tjbz/tjyqhdmhcxhfdm/2020/'
header = {
'Cookie': 'AD_RS_COOKIE=20181108',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ AppleWeb\Kit/537.36 (KHTML, like Gecko)\ '
'Chrome/58.0.3029.110 Safari/537.36'}
provinceList = province()
print(provinceList)
#provinceList = ['v/tjsj/tjbz/tjyqhdmhcxhfdm/2020/43.html']
cityList = city(provinceList)
# print(cityList)
countryList = country(cityList)
print(countryList)
streetList = street(countryList)
print(streetList)
communityList = community(streetList)
print('数据抓取完成')
CREATE TABLE `province` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`code` varchar(255) DEFAULT NULL,
`href` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `city` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`province_id` int(11) unsigned DEFAULT NULL,
`code` varchar(255) DEFAULT NULL,
`href` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `city_province_id_foreign` (`province_id`),
CONSTRAINT `city_province_id_foreign` FOREIGN KEY (`province_id`) REFERENCES `province` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `country` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`city_id` int(11) unsigned DEFAULT NULL,
`code` varchar(255) DEFAULT NULL,
`href` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `country_city_id_foreign` (`city_id`),
CONSTRAINT `country_city_id_foreign` FOREIGN KEY (`city_id`) REFERENCES `city` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `street` (
`id` int(11) unsigned NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`country_id` int(11) unsigned DEFAULT NULL,
`code` varchar(255) DEFAULT NULL,
`href` varchar(255) DEFAULT NULL,
PRIMARY KEY (`id`),
KEY `street_country_id_foreign` (`country_id`),
CONSTRAINT `street_country_id_foreign` FOREIGN KEY (`country_id`) REFERENCES `country` (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `community` (
`id` varchar(100) NOT NULL,
`name` varchar(255) DEFAULT NULL,
`street_id` int(11) unsigned DEFAULT NULL,
`code` varchar(255) DEFAULT NULL,
`category` varchar(255) DEFAULT NULL COMMENT '城乡分类代码',
PRIMARY KEY (`id`),
KEY `community_street_id_foreign` (`street_id`),
CONSTRAINT `community_street_id_foreign` FOREIGN KEY (`street_id`) REFERENCES `street` (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论