python尝试随机⽣成测试集和训练集
尝试随机⽣成测试集和训练集
通常在做数据挖掘或者训练模型时,需要随机按⽐例划分数据集和训练集,这⾥我⾃⼰写了⼀段划分的代码还有⼀份是使⽤sklearn中⼀个函数就划分好的(emmmmm,感谢python各种库)
import pandas as pd
import numpy as np
import math
import random
path1=r'data files\ratings.csv'
csvpath1=open(path1)
ratings_df = pd.read_csv(csvpath1)
#ratings_df.tail()
#tail命令⽤于输⼊⽂件中的尾部内容。tail命令默认在屏幕上显⽰指定⽂件的末尾5⾏。
path2=r'data files\movies.csv'
csvpath2=open(path2,encoding='UTF-8')
movies_df = pd.read_csv(csvpath2)
print(ratings_df.iloc[:,0].size)
#line=ratings_df.iloc[:,0].size
#for i in range(int(line*0.2)):
row_list=[]
line=30
for i in range(line):
#su=random.randint(0,line-1)
su=random.randint(0,line-1)
#print(list(ratings_df.iloc[su,:]))
row_list.append(list(ratings_df.iloc[su,:]))
ratings_df.drop([su],axis=0,inplace=True)#训练集删除该⾏
#set_index(drop = True)
ratings_df = pd.DataFrame(ratings_df,columns=['userId','movieId','rating','timestamp'])
print(ratings_df.iloc[:,0].size)
line-=1
df = pd.DataFrame(row_list,columns=['userId','movieId','rating','timestamp'])
ratings_df.head()
df.head()
print(ratings_df.iloc[:,0].size)
df.drop([0, 1],axis=0,inplace=True)
python新手代码useriddf
补充⼀下看到别⼈⽤sklearn 的⼀个函数实现⽅法
from numpy import random
import numpy as np
num = 10
x = np.floor(10*random.rand(num,2))
y = np.floor(10*random.rand(num,1))
from sklearn import cross_validation
X_train, X_test, y_train, y_test = ain_test_split(x, y, test_size=0.4, random_state=0)

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。