#include <stdio.h>
#include <io.h>
#include <math.h>
#include <stdlib.h>
#define K 10            //kNN中选取最近邻居的个数
#define LINE 1024          //每个文件字符数
const char *to_search_train = "F:\\kNN\\train\\*.txt";        //train数据地址
const char *to_search_test = "F:\\kNN\\test\\*.txt";          //test数据地址
struct              //定义结构体 储存train数据和标签
{
int train_mat[2000][LINE];        //矩阵每一行都是1*LINE的矩阵     
int train_label[2000];        //储存每个数据的标签
}Train;
struct              //定义结构体 储存test数据和标签
{
int test_mat[2000][LINE];
int test_label[2000];
}Test;
float mat_dist[1000][2000];        //定义距离矩阵,储存每个test数据到每个train数据的距离
//定义子函数,功能:将每个test数据与所有train数据的距离进行排序,选取距离最小的前K个,这K个数据标签类型最多的,将此标签返回给主函数
int BubbleSort(float mat_dist_row[], int label[],int num_train)
int i,j,k;
int temp,temp_label;
int num[K] = {0};
int max = 0;
int label_final = 0;
for(i = 0;i < num_train;i++)  //冒泡排序,距离从小到大,同时将label对应跟随
for(j = i+1;j < num_train;j++) 
if(mat_dist_row[i] > mat_dist_row[j]) 
temp = mat_dist_row[i]; 
mat_dist_row[i] = mat_dist_row[j]; 
mat_dist_row[j] = temp;
temp_label = label[i]; 
label[i] = label[j]; 
label[j] = temp_label;
}
for (k = 0;k < K;k++)      //统计前K个数据,各种标签的个数
{
switch(label[k])
{
case 0: num[0]++;break;
case 1: num[1]++;break;
case 2: num[2]++;break;
case 3: num[3]++;break;
case 4: num[4]++;break;
case 5: num[5]++;break;
case 6: num[6]++;break;
case 7: num[7]++;break;
case 8: num[8]++;break;
case 9: num[9]++;break;
default: break;
}
}
max = num[0];
for(i = 0;i < K;i++)      //标签类型最多的,选择次标签 
if (num[i] > max )
{
max = num[i];
label_final = i;
}
}
return label_final;
}
int main()       
{   
FILE *fp;
int c;              //用于逐个读入字符数据
int train_i = 0,train_j = 0,test_i = 0,test_j = 0;  //用于循环
int count_train = 0,count_test = 0;      //用于统计train和test文件的个数
int i,j,k,l;            //用于循环
int sum = 0;            //距离求和
int update_label[2000];          //每次调用函数,更新label
int classifier;            //记录返回的标签类型
int count = 0;            //错误的个数
float rate;            //错误率
char str_adr[255];                                          //fopen函数读入文件名时地址
long handle;                                                //用于查的句柄 
struct _finddata_t fileinfo;        //文件信息的结构体 
handle = _findfirst(to_search_train,&fileinfo);    //第一次查
sprintf(str_adr, "F:\\kNN\\train\\%s", fileinfo.name);  //文件名赋给str_adr
if(-1 == handle)
{
printf("File not exit\n");
}
else
{
switch(fileinfo.name[0])        //给第一个文件赋予标签
{
case '0': ain_label[count_train] = 0;break;
case '1': ain_label[count_train] = 1;break;
case '2': ain_label[count_train] = 2;break;
case '3': ain_label[count_train] = 3;break;
case '4': ain_label[count_train] = 4;break;
case '5': ain_label[count_train] = 5;break;
case '6': ain_label[count_train] = 6;break;
case '7': ain_label[count_train] = 7;break;
case '8': ain_label[count_train] = 8;break;
case '9': ain_label[count_train] = 9;break;
default: break;
}
count_train++;
if((fp = fopen(str_adr,"r")) == NULL)
printf("Error!Can't open the file!\n");
else          //将文件中'0'和'1'字符转化为数字0,1,并储存
{
while((c = fgetc(fp)) != EOF)
{
if(c == '0' || c == '1')
{
train_j++;
}
}
}
fclose(fp);
while(!_findnext(handle,&fileinfo))              //循环查其他符合的文件,知道不到其他的为止 
{
train_j = 0;
train_i++;
sprintf(str_adr, "F:\\kNN\\train\\%s", fileinfo.name);
switch(fileinfo.name[0])      //给后面文件赋予标签
{
case '0': ain_label[count_train] = 0;break;
case '1': ain_label[count_train] = 1;break;
case '2': ain_label[count_train] = 2;break;
case '3': ain_label[count_train] = 3;break;
case '4': ain_label[count_train] = 4;break;
case '5': ain_label[count_train] = 5;break;
case '6': ain_label[count_train] = 6;break;
case '7': ain_label[count_train] = 7;break;
case '8': ain_label[count_train] = 8;break;
case '9': ain_label[count_train] = 9;break;
default: break;
}
if((fp = fopen(str_adr,"r")) == NULL)
printf("Error!Can't open the file!\n");
else          //将文件中'0'和'1'字符转化为数字0,1,并储存
{
while((c = fgetc(fp)) != EOF)
{
if(c == '0' || c == '1')
{
train_j++;
}
}
}
count_train++;
fclose(fp);
}
_findclose(handle);                                     
//下面重复上面文件读入和储存的过程,读入并储存所有test数据
handle = _findfirst(to_search_test,&fileinfo);       
sprintf(str_adr, "F:\\kNN\\test\\%s", fileinfo.name);
if(
-1 == handle)
{
printf("File not exit\n");
}
else
{
switch(fileinfo.name[0])
{
case '0': {st_label[count_test] = 0;break;}
case '1': {st_label[count_test] = 1;break;}
case '2': {st_label[count_test] = 2;break;}
case '3': {st_label[count_test] = 3;break;}
case '4': {st_label[count_test] = 4;break;}
case '5': {st_label[count_test] = 5;break;}
case '6': {st_label[count_test] = 6;break;}
case '7': {st_label[count_test] = 7;break;}
case '8': {st_label[count_test] = 8;break;}
case '9': {st_label[count_test] = 9;break;}
default: break;
}
count_test++;
if((fp = fopen(str_adr,"r")) == NULL)
printf("Error!Can't open the file!\n");
else
{
while((c = fgetc(fp)) != EOF)
{
if(c == '0' || c == '1')
{
test_j++;
}
}
}
fclose(fp);
c语言的冒泡排序算法
while(!_findnext(handle,&fileinfo))               
{
test_j = 0;
test_i++;
sprintf(str_adr, "F:\\kNN\\test\\%s", fileinfo.name);
switch(fileinfo.name[0])
{
case '0': {st_label[count_test] = 0;break;}
case '1': {st_label[count_test] = 1;break;}
case '2': {st_label[count_test] = 2;break;}
case '3': {st_label[count_test] = 3;break;}
case '4': {st_label[count_test] = 4;break;}
case '5': {st_label[count_test] = 5;break;}
case '6': {st_label[count_test] = 6;break;}
case '7': {st_label[count_test] = 7;break;}
case '8': {st_label[count_test] = 8;break;}
case '9': {st_label[count_test] = 9;break;}
default: break;
}
if((fp = fopen(str_adr,"r")) == NULL)
printf("Error!Can't open the file!\n");
else
{
while((c = fgetc(fp)) != EOF)
{
if(c == '0' || c == '1')
{
test_j++;
}
}
}
count_test++;
fclose(fp);
}
_findclose(handle);                                   
for (i = 0;i < count_test;i++)    //计算每个test(循环中的i)数据到每个train(循环中的j)数据的距离
{
for (j = 0;j < count_train;j++)
{
for (k = 0;k < LINE;k++) 
{
sum =sum + (st_mat[i][k]-ain_mat[j][k])*(st_mat[i][k]-ain_mat[j][k]);
}
mat_dist[i][j] = sqrt(sum);
sum = 0; 
}
for (l = 0;l < count_train;l++)    //更新train数据的label
{
update_label[l] = ain_label[l];
}
classifier = BubbleSort(mat_dist[i],update_label,count_train);//调用子函数,得到第i个test数据的标签
if (st_label[i] != classifier)      //统计错误个数
{
count++;           
}
printf("the real answer is:  %d,  the classififier is:  %d\n",st_label[i],classifier);//打印
}
rate = (float)count/count_test;          //计算
错误率
printf("the total number of errors is:  %d\n",count);  //打印
printf("the total error rate is:  %f\n",rate);
return 0;
}

版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。