python制作词云图代码_Python基于WordCloud制作词云图这篇⽂章主要介绍了python基于WordCloud制作,⽂中通过⽰例代码介绍的⾮常详细,对⼤家的学习或者⼯作具有⼀定的参考学习价值,需要的朋友可以参考下
1. 导⼊需要的包package
import matplotlib.pyplot as plt
from scipy.misc import imread
from wordcloud import WordCloud,STOPWORDS
import xlrd
2. 设置⽣成词云图的背景图⽚,最好是分辨率⾼且⾊彩边界分明的图⽚
def set_background(picpath):
python怎么读取txtback_coloring = imread(picpath)# 设置背景图⽚,png等图⽚格式
return back_coloring
3. 创建词云图:WordCloud
def create_word_cloud(txt_str, back_coloring): #txt_str表⽰导⼊的是字符串格式数据,#back_color表⽰的是背景图⽚位置
print('---- 根据词频,开始⽣成词云! ----')
font = r'C:\Windows\' #加载显⽰字体
wc = WordCloud(
font_path=font,
collocations=False, # 去重,如果不加,词云图会显⽰相同的词
stopwords=STOPWORDS, #加载停⽤词,如果不⾃⼰指定,则会加载默认的停⽤词
max_words=100,
width=2000,
height=1200,
# background_color='white',
mask=back_coloring,
)
wordcloud = wc.generate(txt_str)
# 写词云图⽚
<_file(".\wordcloud_test.png")
# 显⽰词云⽂件
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
4. 默认的停⽤词⼀般在:假如anaconda安装在D盘,则会在其⽬录:D:\Anaconda3\Lib\site-packages\wordcloud\stopwords,其中都是英⽂词,例如:
注意:也可以在jieba分词中,先利⽤⾃⼰的停⽤词,得到去除停⽤词之后的⽂本字符串来绘制词云图:
5. 此时,词云图⽆法显⽰数字,这是因为 wc.generate 操作中,有去除数字的语句:在wordcloud.py中,第560⾏左右,所以想要显⽰数字,需要先注释这⼀⾏
6. 假设想要显⽰的词,已经经过jieba分词,保存在txt⽂档中,则绘制词云图的⽅法是:
例如:txt中是每⾏是⼀个词:
则,先读取txt⽂件,形成字符串格式⽂本,再绘制
if __name__ == '__main__':
picpath = r".\xxx.png" #背景图⽚路径
back_coloring = set_background(picpath)
with open(r".\jieba_分词数据.txt", "r",encoding='utf-8') as f:
remove_stop_str = f.read()
create_word_cloud(remove_stop_str, back_coloring)
7. 如果通过jieba分词的数据已经处理成了(词, 词频)并保存在excel中,例如这种两列格式的excel表,第⼀⾏是标签如(词, 词频):则可以先读取词频再显⽰,python读取excel数据可以通过 xlrd.open_workbook ⽅法:
def read_from_xls(filepath,index_sheet):
#读取⽂件名,filepath是excel⽂件的路径,index_sheet是第⼏个sheet
#读取表格#
# 设置GBK编码
ding = "gbk"
rb = xlrd.open_workbook(filepath)
print(rb)
sheet = rb.sheet_by_index(index_sheet)
nrows = ws
data_tmp = []
for i in range(nrows - 1):
tt=i+1 #excel的第⼀⾏是标签
tmp_char = [ll_value(tt,0))] #第⼀列是词
tmp_num = ll_value(tt,1)) #第⼆列是词频
d(tmp_char*tmp_num)
return data_tmp
然后,读数据和⽣成词云图:
if __name__ == '__main__':
picpath = r".\xxx.png"
back_coloring = set_background(picpath)
data_dic = read_from_xls(r'D:\Python_workspace\spyder_space\jieba分词表.xlsx',0)
data_dic_str = '\n'.join(data_dic) #转成字符串格式
create_word_cloud(data_dic_str, back_coloring)
8. 总结代码
# -*- coding: utf-8 -*-
"""
Created on Mon Aug 19 10:47:17 2019
@author: Administrator
"""
import matplotlib.pyplot as plt
from scipy.misc import imread
from wordcloud import WordCloud,STOPWORDS import xlrd
def set_background(picpath):
back_coloring = imread(picpath)# 设置背景图⽚return back_coloring
def create_word_cloud(txt_str, back_coloring): print('---- 根据词频,开始⽣成词云! ----')
font = r'C:\Windows\'
wc = WordCloud(
font_path=font,
collocations=False, # 去重
stopwords=STOPWORDS,
max_words=100,
width=2000,
height=1200,
# background_color='white',
mask=back_coloring,
)
wordcloud = wc.generate(txt_str)
# 写词云图⽚
<_file(".\wordcloud_test.png")
# 显⽰词云⽂件
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
def read_from_xls(filepath,index_sheet):
#读取⽂件名
#读取表格#
# 设置GBK编码
ding = "gbk"
rb = xlrd.open_workbook(filepath)
print(rb)
sheet = rb.sheet_by_index(index_sheet)
nrows = ws
data_tmp = []
for i in range(nrows - 1):
tt=i+1
tmp_char = [ll_value(tt,0))]
tmp_num = ll_value(tt,1))
d(tmp_char*tmp_num)
return data_tmp
if __name__ == '__main__':
picpath = r".\xxx.png"
back_coloring = set_background(picpath)
data_dic = read_from_xls(r'D:\Python_workspace\spyder_space\jieba分词表.xlsx',0) data_dic_str = '\n'.join(data_dic)
# with open(r".\jieba_分词数据.txt", "r",encoding='utf-8') as f:
# remove_stop_str = f.read()
create_word_cloud(data_dic_str, back_coloring)
当然绘制词云图的⽅法有很多,这只是其中的⼀种
以上就是本⽂的全部内容,希望对⼤家的学习有所帮助,也希望⼤家多多⽀持python博客。
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论