하위 버퍼 인 constant
인수를 사용하는 OpenCL (1.2) 커널이 있습니다. 이 커널을 실행할 때 부모 버퍼가 대신 사용 된 것처럼 보입니다. global const
인수를 사용하면 예상대로 작동합니다.OpenCL은 상수 매개 변수에 하위 버퍼의 부모를 사용합니다.
Intel (Linux, beignet git) 및 nVidia (Linux, 367.44-3) 구현을 다른 컴퓨터에서 재현 할 수 있다는 것을 제외하고는이 문제를 드라이버 버그로 수정했습니다. 어딘가에서 실수.
다음은 작동 예제입니다. 예상되는 출력은 1, 1025, 1, 1025,
이지만 대신 1, 1, 1, 1025,
이 인쇄됩니다.
#include <CL/cl.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#define NELEMS(x) (sizeof(x)/sizeof(*x))
#define PLATFORM 0
#define DEVICE 0
const char src[] =
"kernel void test1(constant int * const a) {\n"
" size_t i = get_global_id(0);\n"
" if (i == 1)\n"
" printf(\"%i, \", a[i]);\n"
"}\n"
"\n"
"kernel void test2(global const int * const a) {\n"
" size_t i = get_global_id(0);\n"
" if (i == 1)\n"
" printf(\"%i, \", a[i]);\n"
"}\n";
const size_t src_len = sizeof(src);
const char * const kernels[] = {"test1", "test2"};
int main(void) {
cl_int err = -1;
cl_uint num_platforms;
clGetPlatformIDs(0, NULL, &num_platforms);
assert(num_platforms > PLATFORM);
cl_platform_id * platforms = malloc(sizeof(*platforms) * num_platforms);
clGetPlatformIDs(num_platforms, platforms, NULL);
cl_uint num_devices;
clGetDeviceIDs(platforms[PLATFORM], CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices);
assert(num_devices >= DEVICE);
cl_device_id * devices = malloc(sizeof(*devices) * num_devices);
clGetDeviceIDs(platforms[PLATFORM], CL_DEVICE_TYPE_ALL, num_devices, devices, NULL);
cl_context_properties context_properties[] = {
CL_CONTEXT_PLATFORM, (cl_context_properties) platforms[PLATFORM], 0
};
cl_context context = clCreateContext(context_properties, 1, &devices[DEVICE], NULL, NULL, &err);
assert(err == CL_SUCCESS);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
cl_command_queue queue = clCreateCommandQueue(context, devices[DEVICE], 0, &err);
#pragma GCC diagnostic pop
assert(err == CL_SUCCESS);
cl_program program;
{
// Crashes if directly using src[]
char * source = malloc(src_len);
memcpy(source, src, src_len);
program = clCreateProgramWithSource(context, 1, (const char **) &source, &src_len, &err);
assert(err == CL_SUCCESS);
free(source);
}
err = clBuildProgram(program, 1, &devices[DEVICE], "", NULL, NULL);
assert(err == CL_SUCCESS);
size_t buffer_size = 8192;
size_t subbuffer_size = buffer_size/2;
{
cl_uint align;
err = clGetDeviceInfo(devices[DEVICE], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(align), &align, NULL);
assert(err == CL_SUCCESS);
assert(subbuffer_size % align == 0);
cl_ulong constbuf_size;
err = clGetDeviceInfo(devices[DEVICE], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(constbuf_size), &constbuf_size, NULL);
assert(err == CL_SUCCESS);
assert(constbuf_size > subbuffer_size);
}
cl_mem buffer = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_HOST_WRITE_ONLY, buffer_size, NULL, &err);
assert(err == CL_SUCCESS);
cl_mem sub_buffers[2];
for (size_t i = 0; i < NELEMS(sub_buffers); i++){
cl_buffer_region region = {
.origin = i * subbuffer_size,
.size = subbuffer_size,
};
sub_buffers[i] = clCreateSubBuffer(buffer, 0, CL_BUFFER_CREATE_TYPE_REGION, ®ion, &err);
assert(err == CL_SUCCESS);
}
{
cl_int * data = clEnqueueMapBuffer(queue, buffer, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0, buffer_size, 0, NULL, NULL, &err);
assert(err == CL_SUCCESS);
for (size_t i = 0; i < buffer_size/sizeof(cl_int); i++)
data[i] = i;
cl_event unmap_event;
err = clEnqueueUnmapMemObject(queue, buffer, data, 0, NULL, &unmap_event);
assert(err == CL_SUCCESS);
err = clWaitForEvents(1, &unmap_event);
assert(err == CL_SUCCESS);
}
for (size_t k = 0; k < NELEMS(kernels); k++) {
cl_kernel kernel = clCreateKernel(program, kernels[k], &err);
assert(err == CL_SUCCESS);
cl_event run_event;
for (size_t i = 0; i < NELEMS(sub_buffers); i++){
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &sub_buffers[i]);
assert(err == CL_SUCCESS);
size_t work_size[] = {subbuffer_size/sizeof(cl_int)};
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, work_size, NULL, 0, NULL, &run_event);
assert(err == CL_SUCCESS);
err = clWaitForEvents(1, &run_event);
assert(err == CL_SUCCESS);
err = clFinish(queue);
assert(err == CL_SUCCESS);
}
clReleaseKernel(kernel);
}
puts("");
for (size_t i = 0; i < NELEMS(sub_buffers); i++)
clReleaseMemObject(sub_buffers[i]);
clReleaseMemObject(buffer);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
free(devices);
free(platforms);
return 0;
}
의견을 보내 주셔서 감사 드리며, 공급 업체에 연락하여 자세한 정보가 있는지 확인하겠습니다. – kai