Python学习代码——⾼级篇代码可直接复制到python⽂件中进⾏运⾏
# 1. ⽂件内创建函数
# 内建函数和⽅法
# open() 打开⽂件
# read() 输⼊
# readline() 输⼊⼀⾏
# seek() ⽂件移动
# write() 输出
# close() 关闭⽂件
# 写⼊⽂件,执⾏完成后⽣成txt⽂件
file1 = open('', 'w')
file1.write("20200202")
file1.close()
# 读取⽂件
file2 = open('')
str = ad()
print(str)
file2.close()
# 编辑⽂件
file3 = open('', 'a')
# 字符中带\n输⼊进⾏换⾏
file3.write("\n11111")
file3.close()
# 读取⼀⾏
file4 = open('')
adline())
file4.close()
# 逐⾏读取
file5 = open('')
for str_1 adlines():
print(str_1)
file5.close()
# 操作完成之后⿏标指针⾏⾸
file6 = open('')
adline())
# 回到⾏⾸
print(file6.seek(0))
file6.close()
# 2.python异常的检测和处理
try:
a = 1 / 0
except Exception as e:
print('捕获到的异常是 %s' % e)
finally:
print('最终都会执⾏的语句')
# 3.python的可变参数
def howLong(first, *other):
print(first)
print(other)
howLong('123', '1222', '1111')
# 4.函数的迭代器和⽣成器
list1 = {1, 2, 3}
it = iter(list1)
# 迭代器next()
print(next(it))
print(next(it))
print(next(it))
def frange(start, stop, step):
x = start
while x < stop:
# ⽣成器关键字 yield
yield x
x += step
for i in frange(10, 12, 0.5):
print(i)
# 5.Lambda表达式:匿名函数
add = lambda x, y: x + y
print(add(2, 4))
# 6.python的内建函数
a = [1, 2, 34, 5, 6]
# filter():够快a中⼤于2的数
print(list(filter(lambda x: x > 2, a)))
# map():依次a中的数加⼀
print(list(map(lambda x: x + 1, a)))
# 多个列表处理:a,b中第⼀个元素相加
b = [3, 4, 5, 9]
print(list(map(lambda x, y: x + y, a, b)))
# reduce使⽤需要引⼊:完成数字累加
from functools import reduce
print(reduce(lambda x, y: x + y, [1, 2, 3], 4)) # zip进⾏矩阵转换
dicta = {'aa': 'a', 'bb': 'b', 'cc': 'c'}
dictc = zip(dicta.values(), dicta.keys())
print(list(dictc))
# 7. python 的闭包:嵌套函数
def sum(a):
def add(b):
return a + b
return add
num27 = sum(2)
print(num27(4))
# 8.python多线程
import threading
from threading import current_thread
class Mythread(threading.Thread):
def run(self):
print(current_thread().getName(), 'start')
print('run')
print(current_thread().getName(), 'start') t1 = Mythread()
t1.start()
t1.join() # 线程同步
print(current_thread().getName(), 'end')
# 9.python正则表达式re
# . 匹配任意单个字符
# ^ 以什么字符做开头
# $ 以什么字符做结尾(从后向前进⾏匹配)# * 字符出现0~n次
# + 前⾯字符出现1~N次
# ? 前⾯字符出现0次或1次
# {m} 前⾯字符出现m的次
# {m,n} 前⾯字符出现m~n次
# [] 中括号中任意⼀个字符匹配成功即可
# | 字符选择左边或者右边
# \d 匹配内容为数字
# \D 匹配⾮数字
# \s 匹配字符串
# () 进⾏分组
import re
p = repile('.{3}') # 任意字符出现三次print(p.match('d'))
p1 = repile('jpg$') # 查以jpg结尾的字符print(p1.match('d'))
p2 = repile('ca*') # 查以jpg结尾的字符
print(p2.match('cat'))
p3 = repile('a{4}') # 查a出现4次
print(p3.match('caaaat'))
p4 = repile('c[bcd]t') # 出现bcd中任意⼀个
print(p4.match('cat'))
# 分组
p5 = repile(r'(\d+)-(\d+)-(\d+)')
print(p5.match('2019-02-02')) # 匹配⽇期
print(p5.match('2019-02-02').group(1)) # 匹配年份
year, month, day = p5.match('2019-02-02').groups() # 匹配年份print(year, month, day)
# match是完全匹配进⾏分组,search是进⾏字符匹配搜索
print(p5.match('aaa2019-02-02'))
print(p5.search('aaa2019-02-02'))
# sub匹配替换
phone = '123-456-789 # 这是电话号码'
print(re.sub(r'#.*$', '', phone)) # 将警号后⾯替换为空
print(re.sub(r'\D', '', phone)) # ⾮数字替换为空
# 10. python⽇期函数函数库
# import time
print(time.time()) # 1970年到现在的时间
print(time.localtime())
print(time.strftime('%Y-%m-%d %H:%M:%S'))
import datetime
# datetime⽤作时间的修改
print(w())
new_time = datetime.timedelta(minutes=10)
print(w() + new_time) # ⼗分钟之后的时间one_day = datetime.datetime(2019, 9, 9)
new_day = datetime.timedelta(days=10)
print(one_day + new_day)
# 11.⽹页数据采集与urllib
from urllib import request
url = 'www.baidu'
response = request.urlopen(url, timeout=1)
# ad().decode('utf-8'))
# 12.GET和POST请求
from urllib import parse
from urllib import request
data = bytes(parse.urlencode({'world': 'hello'}), encoding='utf8') # print(data)
response = request.urlopen('/post', data=data) # ad().decode('utf-8'))
import urllib
import socket
try:
response2 = request.urlopen('/get', timeout=1) # ad())
URLError as e:
if ason, socket.timeout):
print("time out")
# 13.python的requests库的使⽤
# get请求
import requests
url2131 = '/get'
data2131 = {'key': 'value', 'abc': 'xyz'}
response2131 = (url2131, data2131)
# )
url2132 = '/post'
data2132 = {'key': 'value', 'abc': 'xyz'}学python需要什么
response2132 = requests.post(url2132, data2132)
# print(response2132.json())
# 14.python的正则表达式爬取链接
# import requests
# import re
content = ('/discoveryPage/hot-⼈像').text
# print(content)
patter2141 = repile(r'<a href="(.*?)".*?title">(.*?)</div>', re.S)
results2141 = re.findall(patter2141, content)
# print('ssssss', results2141)
for result2141 in results2141:
url2141, name2141 = result2141
# print(url2141, re.sub('\s', '', name2141))
# 15.爬蟲使⽤beautiful Soup的安装使⽤
# pip3 install bs4
from bs4 import BeautifulSoup
soup = BeautifulSoup(content, 'lxml')
# print(soup.prettify()) # 格式化的处理
# print(soup.title) # 获取title
# print(soup.title.string) # 获取title
# print(soup.p) # 获取p标签
# print(soup.a) # 获取a标签
# print(soup.find(id='link3')) # 获取id=link3的标签
# 查所有a标签的链接
# for link in soup.find_all('a'):
# ('href'))
# _text()) # 获取⽂档中所有⽂本内容
# 16.爬⾍⽹页标题
# from bs4 import BeautifulSoup
# import requests
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8",
"Connection": "close",
"Cookie": "_gauges_unique_hour=1; _gauges_unique_day=1; _gauges_unique_month=1; _gauges_unique_year=1; _gauges_unique=1", "Referer": "www.infoq",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER" }
url2161 = 'www.infoq/news/'
# 取得⽹页完整内容
def craw(url2162):
response2162 = (url2162, headers=headers)
)
# craw(url2161)
# 取得新闻标题
def craw2(url2163):
response2163 = (url2163, headers=headers)
soup2163 = , 'lxml')
for title_href in soup2163.find_all('div', class_='items__content'):
print([('title')
for title in title_href.find_all('a') ('title')])
# craw2(url2161)
# for i in range(15, 46, 15):
# url2164 = 'www.infoq/news/' + str(i)
# # print(url)
# craw2(url2164)
# 17.python爬⾍爬取图⽚下载
from bs4 import BeautifulSoup
import requests
import os
import shutil
headers = {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8",
"Connection": "close",
"Cookie": "_gauges_unique_hour=1; _gauges_unique_day=1; _gauges_unique_month=1; _gauges_unique_year=1; _gauges_unique=1", "Referer": "www.infoq",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER" }
url = 'www.infoq/presentations'
# 下载图⽚
# Requests 库封装复杂的接⼝,提供更⼈性化的 HTTP 客户端,但不直接提供下载⽂件的函数。
# 需要通过为请求设置特殊参数 stream 来实现。当 stream 设为 True 时,
# 上述请求只下载HTTP响应头,并保持连接处于打开状态,
# 直到访问 t 属性时才开始下载响应主体内容
def download_jpg(image_url, image_localpath):
response = (image_url, stream=True)
if response.status_code == 200:
with open(image_localpath, 'wb') as f:
response.raw.deconde_content = True
# 取得演讲图⽚
def craw3(url):
response = (url, headers=headers)
soup = , 'lxml')
for pic_href in soup.find_all('div', class_='items__content'):
for pic in pic_href.find_all('img'):
imgurl = ('src')
dir = os.path.abspath('.')
filename = os.path.basename(imgurl)
imgpath = os.path.join(dir, filename)
print('开始下载 %s' % imgurl)
download_jpg(imgurl, imgpath)
# craw3(url)
# 翻页
j = 0
for i in range(12, 37, 12):
url = 'www.infoq/presentations' + str(i)
j += 1
print('第 %d 页' % j)
craw3(url)
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论