CUDA 추력을 사용하여 최대 요소 값과 그 위치 찾기

최대 (최소) 요소 값 (res.val 및 res.pos)의 값뿐만 아니라 위치를 얻는 방법은 무엇입니까?CUDA 추력을 사용하여 최대 요소 값과 그 위치 찾기

thrust::host_vector<float> h_vec(100); 
thrust::generate(h_vec.begin(), h_vec.end(), rand); 
thrust::device_vector<float> d_vec = h_vec; 

T res = -1; 
res = thrust::reduce(d_vec.begin(), d_vec.end(), res, thrust::maximum<T>());

출처

2011-10-10 rych

thrust::reduce을 사용하지 마십시오. thrust::max_element (thrust::min_element) thrust/extrema.h에 사용 : max_element에 빈 범위를 전달할 때

thrust::host_vector<float> h_vec(100); 
thrust::generate(h_vec.begin(), h_vec.end(), rand); 
thrust::device_vector<float> d_vec = h_vec; 

thrust::device_vector<float>::iterator iter = 
    thrust::max_element(d_vec.begin(), d_vec.end()); 

unsigned int position = iter - d_vec.begin(); 
float max_val = *iter; 

std::cout << "The maximum value is " << max_val << " at position " << position << std::endl;

조심 - 안전하게 결과를 역 참조 할 수 없습니다.

출처

2011-10-10 07:24:54

Jared Hoberock은 이미이 질문에 만족스럽게 대답했습니다. 배열이 cudaMalloc에 의해 할당되고 device_vector 컨테이너가 할당되지 않은 일반적인 경우에 대해 약간의 변경 사항을 아래에 제공하려고합니다.

아이디어는을 찾는 다음 device_pointermin_ptr에 (내가 대신 일반성의 손실없이 최대의 최소를 고려 중이 야) min_element의 출력을 주조의 cudaMalloc '에드 원시 포인터 주위에 device_pointerdev_ptr을 포장하는 것입니다 최소값은 min_ptr[0]이고 위치는 &min_ptr[0] - &dev_ptr[0]입니다.

#include "cuda_runtime.h" 
#include "device_launch_paraMeters.h" 

#include <thrust\device_vector.h> 
#include <thrust/extrema.h> 

/***********************/ 
/* CUDA ERROR CHECKING */ 
/***********************/ 
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } 
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) 
{ 
    if (code != cudaSuccess) 
    { 
     fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); 
     if (abort) exit(code); 
    } 
} 

/********/ 
/* MAIN */ 
/********/ 
int main() { 

    srand(time(NULL)); 

    const int N = 10; 

    float *h_vec = (float *)malloc(N * sizeof(float)); 
    for (int i=0; i<N; i++) { 
     h_vec[i] = rand()/(float)(RAND_MAX); 
     printf("h_vec[%i] = %f\n", i, h_vec[i]); 
    } 

    float *d_vec; gpuErrchk(cudaMalloc((void**)&d_vec, N * sizeof(float))); 
    gpuErrchk(cudaMemcpy(d_vec, h_vec, N * sizeof(float), cudaMemcpyHostToDevice)); 

    thrust::device_ptr<float> dev_ptr = thrust::device_pointer_cast(d_vec); 

    thrust::device_ptr<float> min_ptr = thrust::min_element(dev_ptr, dev_ptr + N); 

    float min_value = min_ptr[0]; 
    printf("\nMininum value = %f\n", min_value); 
    printf("Position = %i\n", &min_ptr[0] - &dev_ptr[0]); 

}

출처

2015-02-18 22:27:38 JackOLantern

CUDA 추력을 사용하여 최대 요소 값과 그 위치 찾기

답변

관련 문제