基于python的电商⾏业数据分析 导⼊库
python新手代码userid
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import datetime
读取数据
ad_csv('gss.csv')
计算指标
#客户购买单数
buy_num=pd.upby('Customer ID').size())
lumns=['number']
#平均客户购买单数
print(an(buy_num['number']),1))
#客户消费⾦额
df['realcost']=df['Sales']*(1-df['Discount'])
buy_cost=pd.DataFrame((df.groupby('Customer ID').sum())['realcost'])
lumns=['cost']
#平均客户消费⾦额
print(an(buy_cost['cost']),2))
#不同种类商品的总利润排序
cae_prof=pd.DataFrame((df.groupby('Category').sum())['Profit'])
cae_prof=cae_prof.sort_values(by=['Profit'],ascending=False)
#不同种类再次细分排序
subcat_prof=pd.DataFrame((df.groupby(['Category','Sub-Category']).sum())['Profit'])
subcat_prof=subcat_prof.sort_values(by=['Category','Profit'],ascending=False)
#不同商店销售总额排序
mar_sal=pd.DataFrame((df.groupby('Market').sum())['realcost'])
mar_sal=mar_sal.sort_values(by=['realcost'],ascending=False)
#不同商店利润总额排序
mar_prof=pd.DataFrame((df.groupby('Market').sum())['Profit'])
mar_prof=mar_prof.sort_values(by=['Profit'],ascending=False)
date_time=[]
order_date=df['Order Date']
for i in order_date:
a=time.strptime(i,'%Y/%m/%d’) #针对字符类型的⽇期
date_time.append(a)
#提取年份
year=[]
for j in date_time:
b=time.strftime('%Y',j)
year.append(b)
year=pd.DataFrame(year)
#提取⽉份
month=[]
for m in date_time:
c=time.strftime('%m',m)
month.append(c)
month=pd.DataFrame(month)
df['year']=year['year']
df['month']=month['month']
#每年购买⼈数
year_customer=pd.upby(['year','Customer ID']).size().count(level='year')) lumns=['number']
#每年每⽉购买⼈数
df2011=df[df['year']=='2011']
df2012=df[df['year']=='2012']
df2013=df[df['year']=='2013']
df2014=df[df['year']=='2014']
number11=pd.upby(['month','CustomerID']).size().count(level='month')) lumns=['number11']
number12=pd.upby(['month','CustomerID']).size().count(level='month')) lumns=['number12']
number13=pd.upby(['month','CustomerID']).size().count(level='month')) lumns=['number13']
number14=pd.upby(['month','CustomerID']).size().count(level='month')) lumns=['number14']
table=pd.DataFrame()
table['2011']=number11['number11']
table['2012']=number12['number12']
table['2013']=number13['number13']
table['2014']=number14['number14']
print(number11.index)
user_min=pd.DataFrame((df.groupby(['Customer ID']).min())['year'])
#mindate=pd.DataFrame(upby('year').size())
user_min=set_index()
userid=user_min[user_min['year']=='2011']
user=pd.DataFrame(df[df['year']=='2012']['Customer ID'].unique())
(userid,user)
#merge按照相同的列进⾏合并,内连接
#⽉流失率
user_month=pd.DataFrame((upby(['Customer ID']).min())['month'])
#minmonth=pd.DataFrame(upby('month').size())
user_month=set_index()
userm=user_month[user_month['month']=='01']
userm__=pd.DataFrame(df2014[df2014['month']=='02']['Customer ID'].unique())
userm__.columns=['Customer ID']
(userm,userm__)
#每⼀年的平均购物车⼤⼩
year_avg_cost=pd.DataFrame((df.groupby(['year']).mean())['realcost’])
#⽤户每⼀年的购买次数
user_year_num=pd.upby(['Customer ID','year']).size().unstack(fill_value=0))
avg_num=[np.mean(user_year_num['2011']),np.mean(user_year_num['2011']),np.mean(user_year_num['2013']),np.mean(user_year_num['2014'])]
#每⼀年平均每位客户带来的利润
cou_year_pro=pd.DataFrame((df.groupby(['Customer ID','year']).sum().unstack(fill_value=0))['Profit'])
avg_pro=pd.DataFrame(columns=['2011','2012','2013','2014'],index=['avg_pro'])
avg_pro['2011']=np.mean(cou_year_pro['2011'])
avg_pro['2012']=np.mean(cou_year_pro['2012'])
avg_pro['2013']=np.mean(cou_year_pro['2013'])
avg_pro['2014']=np.mean(cou_year_pro['2014'])
avg_pro=anspose()
#快递所需要的天数
order_time=[]
order_date1=df['Order Date']
for e in order_date1:
f=time.strptime(e,'%Y/%m/%d')
order_time.append(f)
send_time=[]
send_date=df['Ship Date']
for o in send_date:
p=time.strptime(o,'%Y/%m/%d')
send_time.append(p)
result=[]
for aa,bb in zip(order_time,send_time):
date1=datetime.datetime(aa[0],aa[1],aa[2])
date2=datetime.datetime(bb[0],bb[1],bb[2])
cc=date2-date1
result.append(cc)

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。