OpenCL demo code

kernel使用文本保存即可

__kernel void MyAddTest(__global const float *a, __global const float *b, __global float *result) {
    int index = get_global_id(0);
    result[index] = a[index] + b[index];
}

调用 TestOpenCLAddDemo

#include <CL/opencl.h>
#include <fstream>

int TestOpenCLAddDemo() {
    const int num_len = 100;        // 测试数据长
    cl_int status = 0;				// 函数返回状态
    cl_uint platforms_num = 0;		// 平台个数
    cl_uint devices_num = 0;		// 设备数量
    //1.get platform num
    status = clGetPlatformIDs(0, NULL, &platforms_num);
    if (CL_SUCCESS != status)
    {
        printf("clGetPlatformIDs error\n");
        return -1;
    }
    // 获得平台地址
    cl_platform_id	platform = NULL;
    if (platforms_num > 0)  // 如果有可用平台
    {
        cl_platform_id *pPlatforms = (cl_platform_id *)malloc(platforms_num * sizeof(cl_platform_id));
        status = clGetPlatformIDs(platforms_num, pPlatforms, NULL);
        platform = pPlatforms[0];	
        free(pPlatforms);			
    }
    size_t name_size = 0;			
    status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &name_size);
    char *platform_name = (char *)alloca(name_size * sizeof(char));
    // get name info
    status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, name_size, platform_name, NULL);
    printf("Platform name:%s\n", platform_name);
    //2.get GPU devices
    cl_device_id *devices = NULL;				
    status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &devices_num);

    if (0 == devices_num)	// GPU numbers==0
    {
        printf("Using CPU\n");
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &devices_num);
        devices = (cl_device_id *)malloc(devices_num * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, devices_num, devices, NULL);
    }
    else
    {
        printf("Using GPU\n");
        devices = (cl_device_id *)malloc(devices_num * sizeof(cl_device_id));
        status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, devices_num, devices, NULL);
    }

    // 3.create env
    cl_context context = clCreateContext(NULL, 1, devices, NULL, NULL, NULL);
    if (context == NULL)
    {
        printf("clCreateContext error\n");
        return -1;
    }

    // 4.create clCreateCommandQueue
    // 创建第1个设备的命令队列
    cl_command_queue command_queue = clCreateCommandQueue(context, devices[0], 0, NULL);
    if (command_queue == NULL)
    {
        printf("clCreateCommandQueue error\n");
        return -1;
    }
    // 5.create cl
    string filename = "D:/vs2019/cudatest/cutest/src/opencldemo.cl";
    string cl_str;	
    const char	*cl_str_ptr;
    status = GetCLKernel(filename.c_str(), cl_str);
    cl_str_ptr = cl_str.c_str();
    size_t	cl_str_size[] = { 0 };			
    cl_str_size[0] = strlen(cl_str_ptr);
    // 创建程序对象
    cl_program program = clCreateProgramWithSource(context, 1, &cl_str_ptr, cl_str_size, NULL);
    if (program == NULL)
    {
        printf("clCreateProgramWithSource error\n");
        return -1;
    }
    // 6.build cl
    // 编译程序
    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
    if (CL_SUCCESS != status)	// 编译错误
    {
        printf("clBuildProgram error\n");
        char szBuildLog[16384];
        clGetProgramBuildInfo(program, *devices, CL_PROGRAM_BUILD_LOG, sizeof(szBuildLog), szBuildLog, NULL);
        printf("Error in Kernel:%s\n", szBuildLog);
        clReleaseProgram(program);
        return -1;
    }

    //7. create device memery
    int *ina = new int[num_len];
    int *inb = new int[num_len];
    for (int i = 0; i < num_len; ++i) {
        ina[i] = i;
        inb[i] = num_len - i;
    }
    cl_mem data_devicea = NULL;
    cl_mem data_deviceb = NULL;
    cl_mem data_device_res = NULL;
    data_devicea = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,(num_len) * sizeof(int),(void *)ina,NULL);
    data_deviceb = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,(num_len) * sizeof(int),(void *)inb,NULL);
    data_device_res = clCreateBuffer(context,CL_MEM_WRITE_ONLY,(num_len) * sizeof(int),NULL,NULL);
    if ((NULL == data_devicea) || (NULL == data_deviceb) || (NULL == data_device_res))
    {
        printf("clCreateBuffer:create error\n");
        return -1;
    }
    //8.create kernel
    cl_kernel kernel = clCreateKernel(program,"MyAddTest",NULL);
    if (NULL == kernel)
    {
        printf("clCreateKernel error\n");
        return -1;
    }

    //9.set kernel params
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&data_devicea);
    status |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&data_deviceb);
    status |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&data_device_res);
    if (CL_SUCCESS != status)
    {
        printf("clSetKernelArg error\n");
    }

    //10.run
    size_t	get_global_size[1] = { 0 };		// 用于设定内核分布	
    get_global_size[0] = num_len;  // 输入数据长
    // 利用命令队列使将再设备上执行的内核排队
    status = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, get_global_size, NULL, 0, NULL, NULL);
    if (CL_SUCCESS != status)
    {
        printf("clEnqueueNDRangeKernel:run kernel error\n");
        return -1;
    }

    // 11.get results from device
    int *res = NULL;
    res = new int[num_len];  // uiStrlength 为 输入字符串长度
    status = clEnqueueReadBuffer(command_queue, data_device_res, CL_TRUE, 0, num_len * sizeof(int), res, 0, NULL, NULL);
    if (CL_SUCCESS != status)
    {
        printf("clEnqueueReadBuffer:read results error\n");
        return -1;
    }
    // 12.print results
    int print_num = 10;
    cout << "input" << endl;
    for (int i = 0; i < print_num; ++i) {
        printf("%d ", ina[i]);
    }
    printf("\ninput2\n");
    for (int i = 0; i < print_num; ++i) {
        printf("%d ", inb[i]);
    }
    printf("\noutput\n");
    for (int i = 0; i < print_num; ++i) {
        printf("%d ", res[i]);
    }
    printf("\n");
    // -------------------------------13.释放资源-------------------------------- 
    status = clReleaseKernel(kernel);
    status = clReleaseProgram(program);
    status = clReleaseMemObject(data_devicea);
    status = clReleaseMemObject(data_deviceb);
    status = clReleaseMemObject(data_device_res);
    status = clReleaseCommandQueue(command_queue);
    status = clReleaseContext(context);
    delete[] ina;
    delete[] inb;
    delete[] res;
    free(devices);
    free(platform_name);
    printf("done\n");
    return 0;
}
cl_int GetCLKernel(const char *file_name, std::string &res)
{
    size_t size = 0;
    size_t file_size = 0;
    char *str_ptr = NULL;
    std::fstream fFile(file_name, (std::fstream::in | std::fstream::binary));
    if (fFile.is_open())
    {
        fFile.seekg(0, std::fstream::end);
        size = file_size = (size_t)fFile.tellg();  // get file size
        fFile.seekg(0, std::fstream::beg);
        str_ptr = new char[size + 1];
        if (str_ptr==NULL)
        {
            fFile.close();
            return 0;
        }
        fFile.read(str_ptr, file_size);				// read bytes
        fFile.close();
        str_ptr[size] = '\0';
        res = str_ptr;
        delete[] str_ptr;
        return 0;
    }
    cout << "Error: Failed to open cl file\n:" << file_name << endl;
    return -1;
}

以下是一个简单的 OpenCL 使用示例,它执行向量加法: ```c #include <stdio.h> #include <stdlib.h> #include <CL/cl.h> #define LENGTH 1024 const char *kernelSource = "__kernel void vectorAdd(__global float *a, __global float *b, __global float *c) {\n" " int i = get_global_id(0);\n" " c[i] = a[i] + b[i];\n" "}\n"; int main() { cl_platform_id platform; cl_device_id device; cl_context context; cl_command_queue queue; cl_program program; cl_kernel kernel; cl_mem a, b, c; cl_int err; int i; float *A, *B, *C; // 初始化 A, B, C 数组 A = (float*) malloc(sizeof(float) * LENGTH); B = (float*) malloc(sizeof(float) * LENGTH); C = (float*) malloc(sizeof(float) * LENGTH); for (i = 0; i < LENGTH; i++) { A[i] = (float) i; B[i] = (float) (LENGTH - i); C[i] = 0.0f; } // 获取平台和设备 err = clGetPlatformIDs(1, &platform, NULL); err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); // 创建 OpenCL 上下文和命令队列 context = clCreateContext(NULL, 1, &device, NULL, NULL, &err); queue = clCreateCommandQueue(context, device, 0, &err); // 创建和编译内核程序 program = clCreateProgramWithSource(context, 1, &kernelSource, NULL, &err); err = clBuildProgram(program, 1, &device, NULL, NULL, NULL); // 创建内核 kernel = clCreateKernel(program, "vectorAdd", &err); // 创建和设置缓冲区 a = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err); b = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * LENGTH, NULL, &err); c = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * LENGTH, NULL, &err); err = clEnqueueWriteBuffer(queue, a, CL_TRUE, 0, sizeof(float) * LENGTH, A, 0, NULL, NULL); err = clEnqueueWriteBuffer(queue, b, CL_TRUE, 0, sizeof(float) * LENGTH, B, 0, NULL, NULL); // 设定内核参数 err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &a); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &b); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &c); // 执行内核 size_t globalSize = LENGTH; size_t localSize = 64; err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize, &localSize, 0, NULL, NULL); // 读取结果 err = clEnqueueReadBuffer(queue, c, CL_TRUE, 0, sizeof(float) * LENGTH, C, 0, NULL, NULL); // 打印结果 for (i = 0; i < LENGTH; i++) { printf("%f + %f = %f\n", A[i], B[i], C[i]); } // 释放内存和资源 free(A); free(B); free(C); clReleaseMemObject(a); clReleaseMemObject(b); clReleaseMemObject(c); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(queue); clReleaseContext(context); return 0; } ``` 这个程序的作用是将两个向量相加,并打印结果。程序首先初始化了三个数组 A, B, C,然后获取 OpenCL 平台和设备,创建上下文和命令队列,创建和编译内核程序,创建内核,创建和设置缓冲区,设定内核参数,执行内核,读取结果,打印结果,最后释放内存和资源。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值