-1
필자는 필사적으로 CUDA 함수로 dll을 만들려고 노력하고 있지만 제대로 작동하지 않습니다. Creating DLL from CUDA using nvcc 컴파일하지만 다음과 같은 오류가있어 :CUDA 참조로 dll을 컴파일 할 수 없습니다.
나는 방법은 여기에서 설명하는 시도
의nvcc :
warning: __declspec attributes ignored
At line:1 char:1
+ nvcc -o ...
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ CategoryInfo : NotSpecified: (...ributes ignored:String) [], RemoteException
+ FullyQualifiedErrorId : NativeCommandError
...\kernel.cu(81): warning: __declspec attributes ignored
...\cudaFFT.h(21): warning: __declspec attributes ignored
.../kernel.cu(81): warning: __declspec attributes ignored
nvcc warning : The 'compute_20', 'sm_20', and 'sm_21' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning).
kernel.cu
CrÚation de la bibliothÞque C:/Users/alombet/Documents/Visual Studio 2015/Projects/Test/kernel.lib et de l'objet C:/Users/alombet/Documents/Visual Studio 2015/Projects/Test/kernel.exp
tmpxft_00003b9c_00000000-30_kernel.obj : error LNK2019: symbole externe non rÚsolu cufftPlan1d rÚfÚrencÚ dans la fonction AllocateMemoryForFFTs
tmpxft_00003b9c_00000000-30_kernel.obj : error LNK2019: symbole externe non rÚsolu cufftExecD2Z rÚfÚrencÚ dans la fonction ComputeFFT
tmpxft_00003b9c_00000000-30_kernel.obj : error LNK2019: symbole externe non rÚsolu cufftDestroy rÚfÚrencÚ dans la fonction DeAllocateMemoryForFFTs
C:/Users/alombet/Documents/Visual Studio 2015/Projects/Test/kernel.dll : fatal error LNK1120: 3 externes non rÚsolus
우선 __declspec
은 무시하고, 보이는 그 후 컴파일러가 '아무튼 될 것 같다 내가 쿠다 라이브러리에서 사용하는 함수를 찾지 못한다. 나는 정말로 손으로 컴파일하는 것에 익숙하지 않다. 일반적으로 IDE를 사용하므로 여기에서 완전히 손실됩니다. 나는 내 문제를 발견
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include <iostream>
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include <cuda_runtime.h>
#include <cufft.h>
#include <cufftXt.h>
#define LIBRARY_EXPORTS 1
#ifdef LIBRARY_EXPORTS
#define LIBRARY_API __declspec(dllexport)
#else
#define LIBRARY_API __declspec(dllimport)
#endif
#include "cudaFFT.h"
#ifdef __cplusplus
extern "C" {
#endif
int LIBRARY_API __cdecl numberOfGpus()
{
int nDevices;
cudaGetDeviceCount(&nDevices);
return nDevices;
}
cufftDoubleReal *host_input;
cufftDoubleReal *device_input;
cufftDoubleComplex *host_output;
cufftDoubleComplex *device_output;
cufftHandle plan;
cudaError LIBRARY_API __cdecl AllocateMemoryForFFTs(int maxSize, int maxBatch)
{
int width = maxSize; int height = maxBatch;
cudaError err = cudaMallocHost((void **)&host_input, sizeof(cufftDoubleReal) * width * height);
if (err)
return err;
err = cudaMallocHost((void **)&host_output, sizeof(cufftDoubleComplex) * (width/2 + 1) * height);
if (err)
return err;
err = cudaMalloc((void **)&device_input, sizeof(cufftDoubleReal) * width * height);
if (err)
return err;
err = cudaMalloc((void **)&device_output, sizeof(cufftDoubleComplex) * (width/2 + 1) * height);
if (err)
return err;
cufftResult res = cufftPlan1d(&plan, width, CUFFT_D2Z, height);
if (res)
return (cudaError)res;
return cudaSuccess;
}
double* LIBRARY_API __cdecl GetInputDataPointer()
{
return host_input;
}
cudaError LIBRARY_API __cdecl ComputeFFT(int size, int batch, double2** result)
{
cudaError err = cudaMemcpy(device_input, host_input, sizeof(cufftDoubleReal) * size * batch, cudaMemcpyHostToDevice);
if (err)
return err;
cufftResult res = cufftExecD2Z(plan, device_input, device_output);
if (res)
return (cudaError)res;
err = cudaMemcpy(host_output, device_output, sizeof(cufftDoubleComplex) * (size/2 + 1) * batch, cudaMemcpyDeviceToHost);
if (err)
return err;
*result = host_output;
return cudaSuccess;
}
void LIBRARY_API __cdecl DeAllocateMemoryForFFTs()
{
cufftDestroy(plan);
cudaFree(device_input);
cudaFree(device_output);
cudaFreeHost(host_input);
cudaFreeHost(host_output);
}
#ifdef __cplusplus
}
#endif