1.商业数据分析及其工具
import pandas as pd
# 读取本地数据
df = pd.read_csv('/2glkx/data/al2-1.csv')
#读取网上数据
import pandas as pd
data_url= "raw.githubusercontent/alstat/Analysis-with-Programming/master/2014/Python/Numerical-Descriptions-of-the-Data/data.csv"
df = pd.read_csv(data_url)
print (df.head())
print (df.tail())
print (df.columns)
Index([u'Abra', u'Apayao', u'Benguet', u'Ifugao', u'Kalinga'], dtype='object')
# Extracting row names or the index
print (df.index)
RangeIndex(start=0, stop=79, step=1)
# Transpose data
print (df.T)
print (df.ix[:, 0].head()) 现在改版ix用loc
print (df.ix[10:20, 0:3])
print (df.lumns[[2, 3]], axis = 1).head())
print (df.describe())
from scipy import stats as ss
# Perform one sample t-test using 1500 as the true mean
print (ss.ttest_1samp(a = df.ix[:, 'Abra'], popmean = 15000))
Ttest_1sampResult(statistic=-1.1281738488299586, pvalue=0.26270472069109496)
print (ss.ttest_1samp(a = df, popmean = 15000))
import matplotlib.pyplot as plt
plt.show(df.plot(kind = 'box'))
import seaborn as sns
# Do the boxplot
plt.show(sns.boxplot(df))
def add_2int(x, y):
    return x + y
print (add_2int(2, 2))
2 Python商业数据的存取
import pandas as pd 
import numpy as np 
a=['apple','pear','watch','money'] 
b=[[1,2,3,4,5],[5,7,8,9,0],[1,3,5,7,9],[2,4,6,8,0]] 
d=dict(zip(a,b)) 
d
p=pd.DataFrame(d) 
p
p.to_csv('F:\\2glkx\\data\\IBM.csv')
pd.read_csv('F:\\2glkx\\data\\al2-1.csv')
import pandas as pd
import numpy as np
df=pd.read_excel('F:\\2glkx\\data\\al2-2.xls')
df.head()
import pandas as pd
import numpy as np
#读取数据并创建数据表,名称为data。
data=pd.ad_excel('G:\\2glkx\\data\\al2-2.xls'))
#查看数据表前5行的内容
data.head()
import tushare as ts
df = ts.get_hist_data('000875') #从网上取数据
#直接保存
#df.to_csv(':/2glkx/data/000875.csv')
#选择数据保存
df.to_csv('F:/2glkx/data/000875.csv',columns=['open','high','low','close'])
import pandas as pd
import numpy as np
df=pd.read_csv('F:/2glkx/data/000875.csv')
df.head()
import tushare as ts
import os
filename = 'F:/2glkx/data/bigfile.csv'
for code in ['000875', '600848', '000981']:
    df = ts.get_hist_data(code)
    if ists(filename):
        #df.to_csv(filename, mode='a', header=None)
df.to_csv(filename, mode='a'
    else:
        df.to_csv(filename)
import pandas as pd
import numpy as np
df=pd.read_excel('F:/2glkx/data/000875.xls')
df.head()
import tushare as ts          #需先安装tushare程序包
#此程序包的安装命令:pip install tushare
import pandas as pd
import numpy as np象中
data = pd.DataFrame()
data1 = ts.get_hist_data('600000')
#需要修改上面的时间
data1 = data1['close']
data1 = data1[::-1]    #按日期从远到近结束
data['600000'] = data1
data2 = ts.get_hist_data('000980')
data2 = data2['close']
data2 = data2[::-1]
data['000980'] = data2
data3 = ts.get_hist_data('000981')
data3 = data3['close']
data3 = data3[::-1]
data['000981'] = data3
data.info()            #查看数据情况
data=data.dropna()
data.info()
data.head()
data.tail()
data= data[['600000', '000981']]
data.head()
data.ix[1:4]#现在ix改为loc
data.iloc[:2, :3]
import tushare as ts
import pandas as pd
pd.set_option('expand_frame_repr', False)  # 显示所有列
ts.set_token('your token')  #获取token号,需要先注册
pro = ts.pro_api()
stock_data = pro.daily(ts_code='000001.SZ', start_date='20100101', end_date='20190101')
stock_data.head()
import pandas_datareader.data as web
import datetime
start = datetime.datetime(2017,1,1)#获取数据的时间段-起始时间
end = day()#获取数据的时间段-结束时间
stock = web.DataReader("600797.SS", "yahoo", start, end)
#获取浙大网新2017年1月1日至今的股票数
stock.head() #打印DataFrame数据前5行
import pandas as pd
from pandas import Series, DataFrame
import numpy as np
import matplotlib.pyplot as plt
from pandas_datareader import data, wb
from datetime import datetime
end = w()
start = ar - 1, h, end.day)
df = data.DataReader('600797.SS', 'yahoo', start, end)
df['Adj Close'].plot(legend=True, figsize=(10,4))
plt.show()
import numpy as np
import pandas as pd
import pandas_datareader.data as web
import datetime
#获取600797.SS浙大网新数据
df_csvsave = web.DataReader("600018.SS","yahoo",datetime.datetime(2019,1,1),day())
print (df_csvsave)
_csv(r'F:\2glkx\data\600018.csv',columns=lumns,index=True)
通达信数字转字符串函数import tushare as ts
import pandas as pd
pd.set_option('expand_frame_repr', False)  # 显示所有列

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。