Python计算信息熵代码
#coding=utf-8
import pandas as pd
import numpy as np
import math
data = pd.read_csv('useriddayu1.csv')
df=data.loc[()]
data.shape #(34250, 8)
df.shape #(34250, 8)
#df=df.head()
upby(df['userid'])
def getIndexSize(df):
upby(df['userid'])
count=0
for n,g in grouped:#这⼀⾏是n,g两个变量从grouped的返回值⾥⾯取元素
count+=1#这个等价于count=count+1
return count
results = pd.DataFrame(index=range(getIndexSize(df)),lumns)#存放处理后的数据结果
ind=0
for id,pp in grouped:
#print(id,"\n",pp)
lenning=len(pp)
for u lumns:
print("===",u)
if(u=='userid'):
results.iloc[ind]['userid']=id
continue
statics=pp[u].value_counts()
sum=0.0
for i in statics:
p=(float)(i/lenning)
sum-=p*math.log(p)
print("sum: ",sum)
results.iloc[ind][u]=sum
ind+=1
print("wowowowowowowow")
#print(results)
<_csv('10.csv',lumns,index=False)
>python新手代码userid
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论