How do run this cuda program?

Question

2.33/5 (2 votes)

See more:

C#

#include <stdio.h>
#include <cuda.h>
#include <stdlib.h>
#include <sys/time.h>
#include <time.h>
#include <math.h>
//#include<conio.h>
#define N 100
float A[N][N];
int i,j,it,n;
float t_1;
float x[N],z[N],e[N],zmax,emax;
#define MAX_RANGE 9999

#define funcCheck(stmt) do {                                                    \
        cudaError_t err = stmt;                                               \
        if (err != cudaSuccess) {                                             \
            printf( "Failed to run stmt %d ", __LINE__);                       \
            printf( "Got CUDA error ...  %s ", cudaGetErrorString(err));    \
            return -1;                                                        \
        }                                                                     \
    } while(0)

// Compute eign values and vector
__global__ void eignvShared(float * A, float * C, 
                                    int numARows, int numAColumns,int numCRows, int numCColumns) 
{
    __shared__ float sA[32][32];   // Tile size of 32x32 

    int Row = blockDim.y*blockIdx.y + threadIdx.y;
    int Col = blockDim.x*blockIdx.x + threadIdx.x;
    float Cvalue = 0.0;
    sA[threadIdx.y][threadIdx.x] = 0.0;

    for (int k = 0; k < (((numAColumns - 1)/ 32) + 1); k++)
    {
        if ( (Row < numARows) && (threadIdx.x + (k*32)) < numAColumns)
        {
            sA[threadIdx.y][threadIdx.x] = A[(Row*numAColumns) + threadIdx.x + (k*32)];
        }
        else
        {
            sA[threadIdx.y][threadIdx.x] = 0.0;
        }            
        __syncthreads();
//////////////////////////////////////
//////////////////////////////////////
        for (int j = 0; j < 32; ++j)
        {
            Cvalue += sA[threadIdx.y][j] ;
        }
    }
    if ( Col < numAColumns)
    {
        C[numAColumns + Col] = Cvalue;
    }
}

void eignvOnHost(float * A, float * C,  int numARows,
                        int numAColumns,int numCRows, int numCColumns)
{
printf("\nEnter the column vector\n");
     scanf("%d",&n);
for(i=1; i<=N; i++)
    { 
        x[i]=A[i][n];
    }
///
for(it=0;it<100; it++)
 {
        for(i=1; i<=N; i++)
        {
            z[i]=0;
            for(j=1; j<=N; j++)
            {
                z[i]=z[i]+A[i][j]*x[j];
            }
        }
        zmax=fabs(z[1]);
        for(i=2; i<=N; i++)
        {
            if((fabs(z[i]))>zmax)
                zmax=fabs(z[i]);
        }
        for(i=1; i<=N; i++)
        {
            z[i]=z[i]/zmax;
        }
        for(i=1; i<=N; i++)
        {
            e[i]=0;
            e[i]=fabs((fabs(z[i]))-(fabs(x[i])));
        }
        emax=e[1];
        for(i=2; i<=N; i++)
        {
            if(e[i]>emax)
                emax=e[i];
        }
        for(i=1; i<=N; i++)
        {
            x[i]=z[i];
        }
    }
///
    
    return;
}

int main(int argc, char ** argv) {
    float * hostA; // The A matrix
    float * hostC; // The output C 
    float * hostComputedC;
    float * deviceA;
    float * deviceC;
clock_t c_1,c_2;
    // Please adjust rows and columns according to you need.
    int numARows = 512; // number of rows in the matrix A
    int numAColumns = 512; // number of columns in the matrix A
    int numCRows; // number of rows in the matrix C (you have to set this)
    int numCColumns; // number of columns in the matrix C (you have to set this)
c_1=time(NULL); // time measure: start mm
    hostA = (float *) malloc(sizeof(float)*numARows*numAColumns);

    for (int i = 0; i < numARows*numAColumns; i++)
    {
        hostA[i] = (rand() % MAX_RANGE) / 2.0;
    }
  // Setting numCRows and numCColumns
    numCRows =1;
    numCColumns = numAColumns;

   hostC = (float *) malloc(sizeof(float)*numCRows*numCColumns);    
   hostComputedC = (float *) malloc(sizeof(float)*numCRows*numCColumns);    

    // Allocating GPU memory
    funcCheck(cudaMalloc((void **)&deviceA, sizeof(float)*numARows*numAColumns));
    funcCheck(cudaMalloc((void **)&deviceC, sizeof(float)*numCRows*numCColumns));

    // Copy memory to the GPU 
    funcCheck(cudaMemcpy(deviceA, hostA, sizeof(float)*numARows*numAColumns, cudaMemcpyHostToDevice));
    // Initialize the grid and block dimensions 
    dim3 dimBlock(32, 32, 1);    
    dim3 dimGrid((numCColumns/32) + 1, (numCRows/32) + 1, 1);

    //@@ Launch the GPU Kernel here
    eignvShared<<<dimGrid, dimBlock>>>(deviceA, deviceC, numARows, numAColumns, numCRows, numCColumns);    

    cudaError_t err1 = cudaPeekAtLastError();
    cudaDeviceSynchronize();
    printf( "Got CUDA error ... %s \n", cudaGetErrorString(err1));

    // Copy the results in GPU memory back to the CPU    
    funcCheck(cudaMemcpy(hostC, deviceC, sizeof(float)*numCRows*numCColumns, cudaMemcpyDeviceToHost));

    eignvOnHost(hostA,  hostComputedC, numARows, numAColumns,numCRows, numCColumns);

    for (int i=0; i < numCColumns*numCRows; i++)
    {
        if (hostComputedC[i]  != hostC[i] )
        {
            printf("Mismatch at  Col = %d hostComputed[] = %f --device[] %f\n",  i % numCColumns, hostComputedC[i], hostC[i]);
            break;
        }
    }
    // Free the GPU memory
    funcCheck(cudaFree(deviceA));    
    funcCheck(cudaFree(deviceC));    

    free(hostA);
    free(hostC);
    free(hostComputedC);
c_2=time(NULL); 
t_1 = (float)(c_2-c_1); 
printf("Execution time: %f \n",t_1);
    return 0;
}

What I have tried:

how can i run this cuda program?
i have linux ubuntu
and visual stodio 2010
it compute the eigenvalue and eigenvector
please help me

Posted 27-Jun-16 16:41pm

Member 12606956

Updated 28-Jun-16 4:11am

Add a Solution

Comments

Sergey Alexandrovich Kryukov 27-Jun-16 23:04pm

How to run? — off-topic. This is a site for software developers, engineers.
—SA

Patrice T 28-Jun-16 0:08am

No tutorial for Cuda ? No example ?

Richard MacCutchan 28-Jun-16 3:59am

You need to build it on your Linux system.

chandanadhikari 28-Jun-16 9:38am

this is not a proper way of asking questions here. Please be more precise as to what problem you are facing when you run this program ... what error message you get etc.etc. ... this information will help us help you better !!

Member 12606956 29-Jun-16 16:47pm

When i run it error : expression must have pointer to object type appear ...

chandanadhikari 30-Jun-16 2:32am

the message gives some explanation already ... you need to find out which line number is this error message pointing to and correct the error

Add your solution here

Treat my content as plain text, not as HTML

Preview 0

…

Existing Members

Sign in to your account

...or Join us

Download, Vote, Comment, Publish.

Your Email
Password
Forgot your password?

Your Email
This email is in use. Do you need your password?
Optional Password

I have read and agree to the Terms of Service and Privacy Policy
Please subscribe me to the CodeProject newsletters

When answering a question please:

Read the question carefully.
Understand that English isn't everyone's first language so be lenient of bad spelling and grammar.
If a question is poorly phrased then either ask for clarification, ignore it, or edit the question and fix the problem. Insults are not welcome.
Don't tell someone to read the manual. Chances are they have and don't get it. Provide an answer or move on to the next question.

Let's work to help developers, not make them feel stupid.

This content, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)