저는 CUDA를 처음 사용했습니다. 나는 그것을 익숙하게하는 데 도움이되는 간단한 연습을하려고 노력했다. 나는 작은 프로그램 인 "Find Prime Numbers"를 코딩했다. 거의 끝났지 만 해결할 수없는 문제가 있습니다.CUDA 오류 메시지 : 잘못된 구성 인수
getPrimeKernel launch failed!!: invalid configuration argument
findPrimeWithCuda failed!!
내가 내 코드를 조정해야합니다 : 나는 1027보다 더 입력, 나는 오류 메시지가 표시됩니다 내가 찾을 수있는 최대 번호가 1027 인 것을 발견? 고맙습니다. 여기
내 코드입니다 :#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <vector>
using namespace std;
cudaError_t findPrimeWithCuda(bool *c, int *a, unsigned int size);
__host__ __device__ bool checkPrime(int i)
{
for (int m = 2; m <= i - 1; m++)
{
if (i%m == 0) return true;
}
return false;
}
__global__ void getPrimeKernel(bool *c, int *a)
{
int i = threadIdx.x;
c[i] = checkPrime(a[i]);
}
void cudaGetPrime(int i)
{
i = i - 3;
int *arr = (int *)malloc((size_t)(i * sizeof(int)));
bool *rst = (bool *)malloc((size_t)(i * sizeof(bool)));
for (int j = 0; j <= i; j++) arr[j] = j + 3;
cudaError_t cudaStatus = findPrimeWithCuda(rst, arr, i);
if (cudaStatus != cudaSuccess) fprintf(stderr,"findPrimeWithCuda failed!!");
}
void w_CudaArray(int lastNum)
{
time_t t1 = time(NULL);
cudaGetPrime(lastNum);
time_t t2 = time(NULL);
printf("Time to spent : %d second\n", t2 - t1);
cout << "Computing with CUDA to count the prime numbers ends!!" << endl << endl;
}
int main()
{
int lastNum = 0;
cout << "The final number which you want to find the prime numbers : ";
cin >> lastNum;
w_CudaArray(lastNum);
}
cudaError_t findPrimeWithCuda(bool *c, int *a, unsigned int size)
{
int *dev_a = 0;
bool *dev_c = false;
cudaError_t cudaStatus;
cudaStatus = cudaSetDevice(0);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaSetDevice failed!!");
goto Error;
}
size_t totalm, freem;
float free_m, total_m, used_m;
cudaMemGetInfo(&freem, &totalm);
free_m = (size_t)freem/1048576.0;
total_m = (size_t)totalm/1048576.0;
used_m = total_m - free_m;
cout << "Total memory = " << total_m << " MB" << endl;
cout << "Used memory = " << used_m << " MB" << endl;
cout << "Free memory = " << free_m << " MB" << endl;
cudaStatus = cudaMalloc((void**)&dev_a, size * sizeof(int));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMalloc dev_a failed!!");
goto Error;
}
cudaStatus = cudaMalloc((void**)&dev_c, size * sizeof(bool));
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMalloc dev_c failed!!");
goto Error;
}
cudaStatus = cudaMemcpy(dev_a, a, size * sizeof(int), cudaMemcpyHostToDevice);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMemcpy dev_a failed!!");
goto Error;
}
getPrimeKernel<<<1, size>>>(dev_c, dev_a);
cudaStatus = cudaGetLastError();
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "getPrimeKernel launch failed!!: %s\n", cudaGetErrorString(cudaStatus));
goto Error;
}
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaDeviceSynchorinze returned error code %d after launching getPrimeKernel!\n", cudaStatus);
goto Error;
}
cudaStatus = cudaMemcpy(c, dev_c, size * sizeof(bool), cudaMemcpyDeviceToHost);
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaMemory failed!");
goto Error;
}
int trueNumber = 0;
for (int i = 0; i < size; i++)
{
if (c[i] == false) trueNumber++;
}
cout << "There are " << trueNumber + 2 << " prime numbers!!" << endl;
cudaStatus = cudaDeviceReset();
if (cudaStatus != cudaSuccess)
{
fprintf(stderr, "cudaDeviceReset failed!!");
}
Error:
cudaFree(dev_c);
cudaFree(dev_a);
return cudaStatus;
}