I am trying to compile an opencl project where I expect an output buffer to be assigned through a cl_mem object but when clEnqueueReadBuffer executes the std::vector<color> items in the array aren't assigned
the source code for the host in c++ is the following:
cl_mem originalPixelsBuffer = clCreateBuffer(p1.context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(Color) * imageObj->SourceLength(), source, &p1.status);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to Create buffer 0");
cl_mem targetBuffer = clCreateBuffer(p1.context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeof(Color) * imageObj->OutputLength(), target, &p1.status);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to Create buffer 1");
p1.status = clEnqueueWriteBuffer(p1.commandQueue, originalPixelsBuffer, CL_FALSE, 0, sizeof(Color) * imageObj->SourceLength(), source, 0, NULL, NULL);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to write buffer 0");
p1.status = clEnqueueWriteBuffer(p1.commandQueue, targetBuffer, CL_TRUE, 0, sizeof(Color) * imageObj->OutputLength(), target, 0, NULL, NULL);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to write buffer 1");
size_t globalWorkSize[2] = { imageObj->originalWidth * 4, imageObj->originalHeight * 4 };
size_t localWorkSize[2]{ 64,64 };
SetLocalWorkSize(IsDivisibleBy64(localWorkSize[0]), localWorkSize);
p1.status = clEnqueueNDRangeKernel(p1.commandQueue, Kernel, 1, NULL, globalWorkSize, IsDisibibleByLocalWorkSize(globalWorkSize, localWorkSize) ? localWorkSize : NULL, 0, NULL, NULL);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to clEnqueueDRangeKernel");
p1.status = clEnqueueReadBuffer(p1.commandQueue, targetBuffer, CL_TRUE, 0, sizeof(Color) * imageObj->OutputLength(), target, 0, NULL, NULL);
CheckErrorCode(p1.status, p1.program, p1.devices[0], "Failed to write buffer 1");
the kernel code:
__kernel void interp(__global struct Color* source,__global struct Color* target,uint64 width,uint64 height,uint64 ratio,uint64 limit, uint64 originalHeight)
{
__private fp32 wIndex = (int64)get_global_id(0);
__private fp32 hIndex = (int64)get_global_id(1);
if(((int64)wIndex)%ratio==MATCH && ((int64)hIndex)%ratio ==MATCH)
{
__private int64 Index = (wIndex/ratio) * (originalHeight/ratio) + (hIndex/ratio);
if(Index < limit)
{
__private int64 tIndex = wIndex * height + hIndex;
target[tIndex].R = source[Index].R;
target[tIndex].G = source[Index].G;
target[tIndex].B = source[Index].B;
target[tIndex].A = source[Index].A;
}
}
}```
What I have tried:
I tried using `
CL_MEM_USE_HOST_PTR
` and
CL_MEM_COPY_HOST_PTR
but CL_MEM_COPY_HOST_PTR takes too long to execute.