LAPACK : 압축 스토리지 매트릭스에 대한 작업이 더 빠릅니까?

Fortran과 LAPACK을 사용하여 실제 대칭 행렬을 tridiagonalize하고 싶습니다. LAPACK은 기본적으로 2 개의 루틴을 제공합니다. 하나는 전체 매트릭스에서, 다른 하나는 압축 된 스토리지의 매트릭스에서 작동합니다. 후자는 분명히 적은 메모리를 사용하지만, 속도 차이에 관해 언급할만한 것이 있는지 궁금합니다.LAPACK : 압축 스토리지 매트릭스에 대한 작업이 더 빠릅니까?

출처

2012-01-20 Michael Becker

나는 이것에 대해 전문가가 아니지만, 내 생각에 그 대답은 "의존 할 것"입니다. 주로 매트릭스의 구조 (희소성의 정도). – eriktous

이것은 경험적 질문입니다. 그러나 일반적으로 아무 것도 무료로 제공되지 않으며 메모리가 적어지고 런타임이 길어지는 것이 일반적입니다.

이 경우 데이터의 색인 생성은 압축 된 경우보다 복잡하므로 행렬을 탐색 할 때 데이터를 가져 오는 비용이 약간 더 높습니다. 이 그림을 복잡하게 만드는 것은 대칭 행렬의 경우 lapack 루틴이 특정 종류의 패킹을 가정하기 때문입니다 (사용 가능한 행렬의 상위 또는 하위 구성 요소 만 있음).

오늘은 고유 문제와 관련하여 혼란 스럽습니다. 측정 벤치 마크로 사용하겠습니다. 간단한 대칭 테스트 케이스 (http://people.sc.fsu.edu/~jburkardt/m_src/test_mat/test_mat.html에서 Herdon 매트릭스)와 함께 노력하고, 내가 인정해야하는, 약 18 %의 차이가있다 sspevd

$ ./eigen2 500 
Generating a Herdon matrix: 
Unpacked array: 
Eigenvalues L_infty err = 1.7881393E-06 
Packed array: 
Eigenvalues L_infty err = 3.0994415E-06 
Packed time: 2.800000086426735E-002 
Unpacked time: 2.500000037252903E-002 

$ ./eigen2 1000 
Generating a Herdon matrix: 
Unpacked array: 
Eigenvalues L_infty err = 4.5299530E-06 
Packed array: 
Eigenvalues L_infty err = 5.8412552E-06 
Packed time: 0.193900004029274  
Unpacked time: 0.165000006556511 

$ ./eigen2 2500 
Generating a Herdon matrix: 
Unpacked array: 
Eigenvalues L_infty err = 6.1988831E-06 
Packed array: 
Eigenvalues L_infty err = 8.4638596E-06 
Packed time: 3.21040010452271  
Unpacked time: 2.70149993896484

와 ssyevd을 비교하면 약간 큰과 (내가 기대했던 것보다 더 큰 포장 된 케이스의 오류?). 이것은 인텔의 MKL입니다. 성능 차이는 일반적으로 매트릭스에 따라 달라집니다. 물론, 에릭 포인트 (eriktous)가 지적한 바와 같이, 그리고 현재 수행중인 문제에 따라 다릅니다. 행렬에 대한 랜덤 액세스가 많을수록 오버 헤드가 더 심해집니다. 내가 사용한 코드는 다음과 같습니다 :

program eigens 
     implicit none 

     integer :: nargs,n ! problem size 
     real, dimension(:,:), allocatable :: A, B, Z 
     real, dimension(:), allocatable :: PA 
     real, dimension(:), allocatable :: work 
     integer, dimension(:), allocatable :: iwork 
     real, dimension(:), allocatable :: eigenvals, expected 
     real :: c, p 
     integer :: worksize, iworksize 
     character(len=100) :: nstr 
     integer :: unpackedclock, packedclock 
     double precision :: unpackedtime, packedtime 
     integer :: i,j,info 

! get filename 
     nargs = command_argument_count() 
     if (nargs /= 1) then 
      print *,'Usage: eigen2 n' 
      print *,'  Where n = size of array' 
      stop 
     endif 
     call get_command_argument(1, nstr) 
     read(nstr,'(I)') n 
     if (n < 4 .or. n > 25000) then 
      print *, 'Invalid n ', nstr 
      stop 
     endif 


! Initialize local arrays  

     allocate(A(n,n),B(n,n)) 
     allocate(eigenvals(n)) 

! calculate the matrix - unpacked 

     print *, 'Generating a Herdon matrix: ' 

     A = 0. 
     c = (1.*n * (1.*n + 1.) * (2.*n - 5.))/6. 
     forall (i=1:n-1,j=1:n-1) 
     A(i,j) = -1.*i*j/c 
     endforall 
     forall (i=1:n-1) 
     A(i,i) = (c - 1.*i*i)/c 
     A(i,n) = 1.*i/c 
     endforall 
     forall (j=1:n-1) 
     A(n,j) = 1.*j/c 
     endforall 
     A(n,n) = -1./c 
     B = A 

     ! expected eigenvalues 
     allocate(expected(n)) 
     p = 3. + sqrt((4. * n - 3.) * (n - 1.)*3./(n+1.)) 
     expected(1) = p/(n*(5.-2.*n)) 
     expected(2) = 6./(p*(n+1.)) 
     expected(3:n) = 1. 

     print *, 'Unpacked array:' 
     allocate(work(1),iwork(1)) 
     call ssyevd('N','U',n,A,n,eigenvals,work,-1,iwork,-1,info) 
     worksize = int(work(1)) 
     iworksize = int(work(1)) 
     deallocate(work,iwork) 
     allocate(work(worksize),iwork(iworksize)) 

     call tick(unpackedclock) 
     call ssyevd('N','U',n,A,n,eigenvals,work,worksize,iwork,iworksize,info) 
     unpackedtime = tock(unpackedclock) 
     deallocate(work,iwork) 

     if (info /= 0) then 
      print *, 'Error -- info = ', info 
     endif 
     print *,'Eigenvalues L_infty err = ', maxval(eigenvals-expected) 


     ! pack array 

     print *, 'Packed array:' 
     allocate(PA(n*(n+1)/2)) 
     allocate(Z(n,n)) 
     do i=1,n 
     do j=i,n 
      PA(i+(j-1)*j/2) = B(i,j) 
     enddo 
     enddo 

     allocate(work(1),iwork(1)) 
     call sspevd('N','U',n,PA,eigenvals,Z,n,work,-1,iwork,-1,info) 
     worksize = int(work(1)) 
     iworksize = iwork(1) 
     deallocate(work,iwork) 
     allocate(work(worksize),iwork(iworksize)) 

     call tick(packedclock) 
     call sspevd('N','U',n,PA,eigenvals,Z,n,work,worksize,iwork,iworksize,info) 
     packedtime = tock(packedclock) 
     deallocate(work,iwork) 
     deallocate(Z,A,B,PA) 

     if (info /= 0) then 
      print *, 'Error -- info = ', info 
     endif 
     print *,'Eigenvalues L_infty err = ', & 
     maxval(eigenvals-expected) 

     deallocate(eigenvals, expected) 


     print *,'Packed time: ', packedtime 
     print *,'Unpacked time: ', unpackedtime 


contains 
    subroutine tick(t) 
     integer, intent(OUT) :: t 

     call system_clock(t) 
    end subroutine tick 

    ! returns time in seconds from now to time described by t 
    real function tock(t) 
     integer, intent(in) :: t 
     integer :: now, clock_rate 

     call system_clock(now,clock_rate) 

     tock = real(now - t)/real(clock_rate) 
    end function tock 

end program eigens

출처

2012-01-20 18:56:46

LAPACK : 압축 스토리지 매트릭스에 대한 작업이 더 빠릅니까?

답변

관련 문제