我有简单的 MPI fortran 代码,如下所示。代码崩溃并出现错误 forrtl:严重(174):SIGSEGV,发生分段错误。我不确定错误在哪里。我在这里注意到的奇怪的事情是它并不总是崩溃。有时对于小n它有效有时无效。对于某些处理器来说,它适用于某些处理器。对于给定的示例,它不适用于任何数量的处理器。 MPI 中的调试对我来说并不那么容易。有人能发现这里出了什么问题吗?
program crash
use mpi
implicit none
integer,parameter::dp=kind(1.d0)
integer, parameter :: M =1500,N=M,O=M! Matrix dimension
integer myrank, numprocs,ierr, root
integer i, j, k,l,p,local_n,sendcounts
real(dp) R1(M),RHS(M),RHS1(M)
real(dp),dimension(:),allocatable::local_A
real(dp),dimension(:,:),allocatable::local_c,local_c1
real(dp) summ,B(N,O),B1(N,O),C(M*O),C1(M)
real(dp) final_product(M,O),rhs_product(O)
integer,dimension(:),allocatable::displs!,displs1,displs2
integer,dimension(:),allocatable::sendcounts_list
real(dp),dimension(:,:),allocatable::local_A_Matrix
integer status(MPI_STATUS_SIZE)
integer request
! Initialize MPI
call MPI_Init(ierr)
call MPI_Comm_size(MPI_COMM_WORLD, numprocs, ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)
B=0.d0
do i=1,N
do j=1,O
B(i,j)=(i+1)*myrank+j*myrank+j*i
enddo
enddo
R1=0.d0
do i=1,N
R1(i)=i*myrank+1
enddo
if (myrank<numprocs-mod(M,numprocs)) then
local_n=M/numprocs
else
local_n=M/numprocs+1
endif
sendcounts = local_n * N
allocate(sendcounts_list(numprocs))
call MPI_AllGATHER(local_n, 1, MPI_INT, sendcounts_list, 1, MPI_INT,MPI_COMM_WORLD,IERR)
if(myrank==0) then
allocate(displs(numprocs))
displs=0
do i=2,numprocs
displs(i) = displs(i-1)+N*sendcounts_list(i-1)
enddo
endif
allocate(local_A(sendcounts))
local_A=0.d0
call MPI_Scatterv(Transpose(B),N*sendcounts_list,displs,MPI_Double,local_A,N*local_n, &
MPI_Double,0,MPI_COMM_WORLD,ierr)
deallocate(sendcounts_list)
if(myrank==0) then
deallocate(displs)
endif
allocate(local_A_Matrix(local_n,N))
local_A_Matrix=reshape(local_A,(/local_n,N/),order=(/2,1/))
deallocate(local_A)
call MPI_Finalize(ierr)
end program crash
你的问题在于
call MPI_Scatterv(Transpose(B),N*sendcounts_list
线。列表相乘在 Fortran 中是合法的,但它并没有给出 MPI 在该本地中期望的那种东西。创建一个临时数组并将其作为内容,然后传递它。