/********************************************************************
* CUDAWin32App.cu
* This is a example of the CUDA program.
*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
//..自己添加的两个头文件
#include <time.h>
#include <cutil_inline.h>
float* h_A;
float* h_B;
float* h_C;
float* d_A;
float* d_B;float* d_C;
__global__ void DotMulVet(const float* A,const float* B,float* C,int N)
{
int i=blockIdx.x*blockDim.x+threadIdx.x;
if(i<N)
{
C[i]=A[i]*B[i];
}
}
int main()
{
int N=1000;
int i;
int mem_size=sizeof(float)*N;
cudaEvent_t start, finish;
cutilSafeCall(cudaEventCreate(&start));
cutilSafeCall(cudaEventCreate(&finish));
printf("Start build\n");
h_A=(float*)malloc(mem_size);
h_B=(float*)malloc(mem_size);
h_C=(float*)malloc(mem_size);
for(i=0;i<N;i++)
{
h_A[i]=(float)(rand()/RAND_MAX);
h_B[i]=(float)(rand()/RAND_MAX);
}
cutilSafeCall(cudaMalloc((void**)&d_A,mem_size));
cutilSafeCall(cudaMalloc((void**)&d_B,mem_size));
cutilSafeCall(cudaMalloc((void**)&d_C,mem_size));
cutilSafeCall(cudaMemcpy(d_A,h_A,mem_size,cudaMemcpyHostToDevice));
cutilSafeCall(cudaMemcpy(d_B,h_B,mem_size,cudaMemcpyHostToDevice));
printf("Start compute\n");
int blockMax=500;
int blockNum=(N+blockMax-1)/blockMax;
cudaEventRecord(start,0);
DotMulVet<<< blockNum , blockMax>>>(d_A,d_B,d_C,N); //为?分?配?共?享?内?存?设?置?Ns
cutilCheckMsg("kernel launch failure");
//cutilSafeCall( cudaThreadSynchronize() );
cudaEventRecord(finish,0);
cudaEventSynchronize(finish);
cutilSafeCall(cudaMemcpy(h_C,d_C,mem_size,cudaMemcpyDeviceToHost));
for(i=0;i<N;i++)
{
float ans=h_A[i]*h_B[i];
if(fabs(ans-h_C[i])>1E-6)
break;
}
printf("Result: %s\n",(i==N)?"Correct":"Wrong");
float costTime;
cutilSafeCall(cudaEventElapsedTime(&costTime,start,finish));
printf("Cost Time : %f\n",costTime);
getchar();
free(h_A);
free(h_B);
free(h_C);
cudaFree(d_A);
cudaFree(d_B); app开发实例
cudaFree(d_C);
}
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系QQ:729038198,我们将在24小时内删除。
发表评论