#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<math.h>
#include<time.h>
#include<cutil.h>
#define BLOCK_SIZE 16
#define Width (200*BLOCK_SIZE)
#define Height (200*BLOCK_SIZE)
__global__void matrixAdd(float*C,float*A,float*B,int width,int height)
{
int bx=blockIdx.x;
int by=blockIdx.y;
int tx=threadIdx.x;
int ty=threadIdx.y;
int index=width*BLOCK_SIZE*by+BLOCK_SIZE*bx+width*ty+tx;
C[index]=A[index]+B[index];
}
int main(int argc,char**argv)
{
CUT_DEVICE_INIT(argc,argv);
CUDA_SAFE_CALL(cudaSetDevice(1));
srand(2006);
int sizeMat=Width*Height;
int memsizeMat=sizeof(float)*sizeMat;
float*h_A=(float*)malloc(memsizeMat);
本内容试读结束,登录后可阅读更多
下载后可阅读完整内容,剩余2页未读,立即下载
- 1
- 2
前往页