int N = 10; //Array Containing Maximum of 10 elements size_t size = N*sizeof(float); ... cudaMalloc((**void &&)a_d, size); cudaMalloc((**void &&)b_d, size); cudaMalloc((**void &&)c_d, size); ... ... cudaMemcpy(a_d, a_h, size, cudaMemcpyHostToDevice); cudaMemcpy(b_d, b_h, size, cudaMemcpyHostToDevice); //How to determine no. of threads here??? int threadsPerBlock = ??? int noOfBlocks = ?? fmultiply<<<threadsPerBlock, noOfBlocks>>>(a_d, b_d, c_d); cudaMemcpy(c_d, c_h, size, cudaMemcpyDeviceToHost); ... ...
var
This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)