我正在处理的子例程在数组大小变大时效率很低,例如NN = 1000,KK = 200,MM =200。但是,我无法提出优化它的想法。
subroutine noise_wave(NN, KK, MM, id, phase_base, wave_base, wave)
implicit none
integer, intent(in) :: NN, KK, MM
integer, intent(in), dimension(MM, 3) :: id
complex*16, intent(in) :: phase_base(3)
real*8, intent(in), dimension(KK, NN*(NN+1)/2 ) :: wave_base ! upper triangle
complex*16, intent(out) :: wave(NN, NN)
integer :: i, j, k, p, n
integer :: x, y, z
complex*16 :: phase
n = 0
do j = 1, NN
do i = 1, j
n = n + 1
do k = 1, KK
do p = 1, MM
x = id(p, 1)
y = id(p, 2)
z = id(p, 3)
phase = (phase_base(1) ** x) * (phase_base(2) ** y) * (phase_base(3) ** z)
wave(i,j) = wave(i,j) + wave_base(k,n) * conjg(phase)
enddo
enddo
wave(j,i) = conjg(wave(i,j) )
enddo
enddo
end subroutine noise_wave
有人可以给我一些提示吗?
如果将循环嵌套更改为],则可能会获得可观的加速效果>
do p = 1, MM x = id(p, 1) y = id(p, 2) z = id(p, 3) phase = (phase_base(1) ** x) * (phase_base(2) ** y) * (phase_base(3) ** z) conjg_phase = conjg(phase) ! new variable, calculate here, use below n = 0 do j = 1, NN do i = 1, j n = n + 1 do k = 1, KK wave(i,j) = wave(i,j) + wave_base(k,n) * conjg_phase enddo enddo wave(j,i) = conjg(wave(i,j) ) enddo enddo
((如果我了解代码,它可能仍然是正确的!)。如果重复足够频繁,即使是像我从循环嵌套的底部移出的那些计算之类的小事也很费劲。而且,将这些值较少频繁地移入和移出高速缓存可能会受益于执行速度。