omp Fortran 循环与数组操作的效率比较

问题描述 投票:0回答:0

我正在比较 Fortran 中 openmp 与不同组织在循环索引上的效率以及是否拆分索引

对于给定的代码

Program test
    Use, Intrinsic :: iso_fortran_env, Only :  wp => real64, li => int64
    use omp_lib

    integer, parameter :: dp = selected_real_kind(15, 307)
    
    Real( dp ), Dimension( :, : ), Allocatable :: a
    Real( dp ), Dimension( :, : ), Allocatable :: c

    Integer :: na
    Integer :: i, j, m
    Integer( li ) :: start, finish, rate
    Integer :: numthreads
    real(dp) :: sum_time1
    
    Write( *, * ) 'numthreads'
    Read( *, * ) numthreads

    call omp_set_num_threads(numthreads)  

    Write( *, * ) 'na'
    Read( *, * ) na
    Allocate( a ( 1:na, 1:na ) ) 
    Allocate( c ( 1:na, 1:na ) ) 

    Call Random_number( a )


    sum_time1 = 0.0  
    Call System_clock( start, rate )
    do m = 1, m_iter    
      do i = 1, na
          c(i,:) = a(i,:)
      end do  
    end do  
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate  
    Write( *, * ) 'Time for loop first index', sum_time1 

    sum_time1 = 0.0  
    Call System_clock( start, rate )
      do i = 1, na
          c(:,i) = a(:,i)
      end do  
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate  
    Write( *, * ) 'Time for loop last index', sum_time1   


    sum_time1 = 0.0  
    Call System_clock( start, rate )
      do i = 1, na
        do j = 1, na
          c(i,j) = a(i,j)
        end do  
      end do  
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate  
    Write( *, * ) 'Time for loop two index first one', sum_time1


    sum_time1 = 0.0  
    Call System_clock( start, rate )
      do i = 1, na
        do j = 1, na
          c(j,i) = a(j,i)
        end do  
      end do  
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate  
    Write( *, * ) 'Time for loop two index inner most', sum_time1    

    sum_time1 = 0.0  
    Call System_clock( start, rate )
    !$omp parallel 
    !$omp do private(i)  
      do i = 1, na
        c(i,:) = a(i,:)
      end do  
    !$omp end do
    !$omp end parallel
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate 

    Write( *, * ) 'Time for loop-omp first index', sum_time1  


    sum_time1 = 0.0  
    Call System_clock( start, rate )
    !$omp parallel 
    !$omp do private(i)  
      do i = 1, na
        c(:,i) = a(:,i)
      end do  
    !$omp end do
    !$omp end parallel
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate 

    Write( *, * ) 'Time for loop-omp last index', sum_time1  


    sum_time1 = 0.0  
    Call System_clock( start, rate )
    !$omp parallel private(nthr, nstart, nend)
      nthr   = omp_get_thread_num()
      nstart = nthr * na / numthreads
      nend   = (nthr + 1) *  na / numthreads
      c(:, nstart+1:nend) = a(:, nstart+1:nend)
    !$omp end parallel
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate 

    Write( *, * ) 'Time for omp split last index', sum_time1   

    sum_time1 = 0.0  
    Call System_clock( start, rate )
    !$omp parallel private(nthr, nstart, nend)
      nthr   = omp_get_thread_num()
      nstart = nthr * na / numthreads
      nend   = (nthr + 1) *  na / numthreads
      c(nstart+1:nend, :) = a(nstart+1:nend, :)
    !$omp end parallel
    Call System_clock( finish, rate )
    sum_time1 = sum_time1 + Real( finish - start, dp ) / rate 

    Write( *, * ) 'Time for omp split first index', sum_time1   
  End

使用

ifort
-qopenmp
-o3
标志,我得到以下结果

  numthreads
8
 na
5000
 Time for loop first index  0.116499780000000
 Time for loop last index  3.983250000000000E-002
 Time for loop two index first one  8.187200000000000E-003
 Time for loop two index inner most  8.229439999999999E-003
 Time for loop-omp first index  3.069090000000000E-003
 Time for loop-omp last index  3.028410000000000E-003
 Time for omp split last index  1.025315000000000E-002
 Time for omp split first index  9.189559999999999E-003

我的问题是 (i) 为什么在循环中使用两个索引比例如

(:,i)

更快

(ii) 为什么“拆分”方法在 do 循环中比

om
p 慢?

fortran openmp
© www.soinside.com 2019 - 2024. All rights reserved.