python批量保存⽹页为pdf_利⽤Python把⽹页内容转换为pdf
格式⽂件,批量下载。。。
使⽤Google浏览器的打印命令时,保存下来的pdf⽂件中包含⽹页中的所有内容(左右边框和⼴告等),
想仅把当前⽹页中的主体内容转成pdf格式的⽂件保存下来。
操作说明:
填写URL及提取条件(浏览器中按F12,查),预览,打印。
python代码说明:
默认可不输⼊提取条件,对于没有下载过的⽹站,会出现提⽰;对于已下载过的⽹站,有保存记录,⾃动关联当前⽹站的提取条件。
python代码如下:
import win32api
import win32con
import requests
from bs4 import BeautifulSoup
import webbrowser
import tkinter
from tkinter import filedialog
import pdfkit
# 预览
def take_body():
global url
global body_class
global headers
url = ()
body_class = ()
# 请求URL
headers = {
"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding":"gzip, deflate, br",
"User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)
Chrome/65.0.3325.146 Safari/537.36"
}
r = (url,headers = headers)
soup = t,"html.parser")
# 提取标题
global title
title =
# 判断该⽹站是否已下载转换过
f = open("","r")
a = f.read()
dict = eval(a)
f.close()
if url.split("/")[2] in dict:
body_class = dict[url.split("/")[2]]
elif body_class == "":
win32api.MessageBox(0, "未从在该⽹站下载过⽂档,添加提取条件", "提⽰", win32con.MB_OK)
# 提取正⽂
if body_class != "":
body = soup.find_all(class_= body_class)[0]
getsavefilename# 对提取的内容,调⽤浏览器进⾏预览
html_test = str(body)
with open("html_test.html","wb")as f:
f.write(de("utf-8"))
webbrowser.open("E:PythonHtmlToPdfHtmlToPdf_V0.7html_test.html",new=0,autoraise=True) # html转pdf
def htmltopdf():
file_name = title + ".pdf"
types = [("pdf⽂件", "*.pdf")]
dest_dir = filedialog.asksaveasfilename(initialfile = file_name,filetypes = types)
options = {"encoding": "utf-8"}
pdfkit.from_file("html_test.html", dest_dir, options=options)
dict = {}
f = open("","r")
a = f.read()
dict = eval(a)
f.close()
dict[url.split("/")[2]] = body_class
f = open("","w")
f.write(str(dict))
f.close()
# 创建窗⼝
root = tkinter.Tk()
root.title("HtmlToPdf")
var_url = tkinter.StringVar()
var_body = tkinter.StringVar()
label_1 = tkinter.Label(root,text = "URL:").place(x=10,y=10)
text_1 = tkinter.Entry(root,textvariable = var_url).place(x=100,y=10)
label_2 = tkinter.Label(root,text = "输⼊提取条件:").place(x=10,y=55)
text_2 = tkinter.Entry(root,show = None,textvariable = var_body).place(x=100,y=55)
button_2 = tkinter.Button(root,text = "预览",command = take_body).place(x=250,y=50)
label_3 = tkinter.Label(root,text = "是否打印:").place(x=10,y=100)
button_3 = tkinter.Button(root,text = "是",command = htmltopdf).place(x=100,y=95)
button_3 = tkinter.Button(root,text = "否",command = root.quit).place(x=130,y=95)
root.mainloop()
本⽂仅代表作者个⼈观点,不代表SEO研究协会⽹官⽅发声,对观点有疑义请先联系作者本⼈进⾏修改,若内容⾮法请联系平台管理员。更多相关资讯,请到SEO研究协会⽹www.seoxiehui学习互联⽹营销技术请到巨推学院www.jutuiedu。

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。