考虑以下在 Fortran 中实现简单神经网络的方法:抽象多态类型
layer
type, abstract :: layer
real, allocatable :: A(:,:)
class(layer), pointer :: nextLayer => null()
class(layer), pointer :: prevLayer => null()
contains
procedure(updateLayerINTRFC), deferred :: update
end type
abstract interface
subroutine updateLayerINTRFC(self)
import :: layer
class(layer), intent(inout) :: self
end subroutine
end interface
封装了所有层必须具有的通用属性,即。内容(二维实数数组 A),提供新输入时更新层的规则以及指向相邻层(上一个和下一个)的指针。扩展基本
layer
类型是任意数量的特定图层类型,例如
type, extends(layer) :: layerA
! Weights determining the network behaviour
real, allocatable :: W(:,:)
contains
procedure :: update => updateA
end type
! to be located in the contains section of the module
subroutine updateA(self)
class(layerA), intent(inout) :: self
! The update rule has been simplified for clarity
self%A = matmul(self%W,self%prevLayer%A)
end subroutine
还有一种特殊的
layerFirst
用于网络的第一层
type, extends(layer) :: layerFirst
integer :: nLayers ! number of layers in a network
integer, allocatable :: nCells(:) ! number of cells in each layer
contains
procedure :: forwardFull
procedure :: update => updateFirst
end type
作为整个网络的句柄并描述其维度。网络由以
layerFirst
实例开头的层链表组成。该列表通常是异构的,这排除了使用类型层数组的可能性(我知道通过引入中间包装类型来解决方法,但更喜欢指针方法)。只需遍历列表并更新每一层即可执行前向传递(即从输入获取输出)
subroutine forwardFull(self,input,output)
class(layerFirst), intent(inout) :: self
real, intent(in) :: input(self%nCells(1))
real, intent(out) :: output(self%nCells(self%nLayers))
class(layer), pointer :: currentLayer
integer :: i
self%A(:,1) = input
currentLayer => self%nextLayer
do i=2,self%nLayers-1
call currentLayer%update()
currentLayer => currentLayer%nextLayer
end do
call currentLayer%update
output = reshape(currentLayer%A,shape=[self%nCells(self%nLayers)])
end subroutine
由于问题不是关于训练网络,因此可以随机生成权重以给出在 gfortran-10 下编译的最小工作示例
module utils
implicit none
type, abstract :: layer
real, allocatable :: A(:,:)
class(layer), pointer :: nextLayer => null()
class(layer), pointer :: prevLayer => null()
contains
procedure(updateLayerINTRFC), deferred :: update
end type
type, extends(layer) :: layerFirst
integer :: nLayers
integer, allocatable :: nCells(:)
contains
procedure :: forwardFull
procedure :: update => updateFirst
end type
type, extends(layer) :: layerA
real, allocatable :: W(:,:)
contains
procedure :: update => updateA
end type
abstract interface
subroutine updateLayerINTRFC(self)
import :: layer
class(layer), intent(inout) :: self
end subroutine
end interface
contains
! ignore this, first layer does not need to be updated
subroutine updateFirst(self)
class(layerFirst), intent(inout) :: self
! Do nothing
end subroutine
subroutine updateA(self)
class(layerA), intent(inout) :: self
! the update rule has been simplified for clarity
self%A = matmul(self%W,self%prevLayer%A)
end subroutine
subroutine forwardFull(self,input,output)
class(layerFirst), intent(inout) :: self
real, intent(in) :: input(self%nCells(1))
real, intent(out) :: output(self%nCells(self%nLayers))
class(layer), pointer :: currentLayer
integer :: i
self%A(:,1) = input
currentLayer => self%nextLayer
do i=2,self%nLayers-1
call currentLayer%update()
currentLayer => currentLayer%nextLayer
end do
call currentLayer%update
output = reshape(currentLayer%A,shape=[self%nCells(self%nLayers)])
end subroutine
function newLayerA(ncurr,nprev) result(res)
type(layerA) :: res
integer, intent(in) :: ncurr, nprev
allocate(res%A(ncurr,1))
allocate(res%W(ncurr,nprev))
call random_number(res%A)
end function
function newNetwork() result(net)
type(layerFirst), target :: net
class(layer), pointer :: currentLayer
integer :: i
! network dimensions are hardcoded for simplicity
net%nLayers = 4
net%nCells = [784,128,64,10]
allocate(net%A(net%nCells(1),1))
allocate(net%nextLayer, source=newLayerA(net%nCells(2),net%nCells(1)))
currentLayer => net%nextLayer
currentLayer%prevLayer => net ! is net missing a target attribute
do i=2,net%nLayers-1
allocate(currentLayer%nextLayer, source=newLayerA(net%nCells(i+1),net%nCells(i)))
currentLayer%nextLayer%prevLayer => currentLayer
currentLayer => currentLayer%nextLayer
end do
end function
end module
program test
use utils
implicit none
type(layerFirst) :: net
real :: input(784,1), output(10,1)
call random_number(input)
net = newNetwork()
call net%forwardFull(input,output)
print '(" Most likely match : ", I0)', maxloc(output,1)-1
end program
但是,这似乎并没有正确地将第二层
prevLayer
指针关联到第一层,至少不一致。所有其他的都可以简单地检查,唯一的问题是图中标记为“不正常”的那一个
Ok Ok Ok
┌────────────────┐ ┌────────────────┐ ┌────────────────┐ ┌────────────────┐
│type(layerFirst)├─────►│ type(layerA) ├─────►│ type(layerA) ├─────►│ type(layerA) ├─────► Null()
│ :: net │ │ │ │ │ │ │
│ 784 cells │ │ 128 cells │ │ 64 cells │ │ 10 cells │
Null() ◄─────┤ │◄─────┤ │◄─────┤ │◄─────┤ │
└────────────────┘ └────────────────┘ └────────────────┘ └────────────────┘
Not ok Ok Ok
例如,从主程序调用
print*, shape(net%nextLayer%prevLayer%A)
会产生错误的输出 3784704 0
,而不是像 784 1
那样输出 print*, shape(net%A)
。有时,这(令人烦恼的不一致)会导致第二层的 updateA
中的矩阵乘法使程序崩溃。
是否存在一些与链接同一通用类的不同类型有关的深奥规则?我是否犯了一个错误(可能在函数
newNetwork
中)或者这是编译器问题?
更新:ifx错误消息指向
allocate(net%nextLayer, source=newLayerA(net%nCells(2),net%nCells(1)))
声称Uninitialized value was created by a heap allocation
线的方向,但我不知道该怎么做。完整消息已发送
==11782==WARNING: MemorySanitizer: use-of-uninitialized-value
#0 0x4a7ae6 in utils_mp_newnetwork_ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:93:13
#1 0x4aad5f in MAIN__ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:107:11
#2 0x40a768 in main (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x40a768) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#3 0x7f5a69821c86 in __libc_start_main /build/glibc-CVJwZb/glibc-2.27/csu/../csu/libc-start.c:310
#4 0x40a619 in _start (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x40a619) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
Uninitialized value was created by a heap allocation
#0 0x418046 in malloc (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x418046) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#1 0x50a454 in _mm_malloc (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x50a454) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#2 0x4aeb23 in do_alloc_copy for_alloc_copy.c
#3 0x4addbd in do_for_alloc_copy for_alloc_copy.c
#4 0x4a40fc in utils_mp_newnetwork_ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:88:9
#5 0x4aad5f in MAIN__ /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:107:11
#6 0x40a768 in main (/home/pavel/Dokumenty/meteorologie/otazka/a.out+0x40a768) (BuildId: 344b775a27909a7ac2f3797db049781f62dcc8f5)
#7 0x7f5a69821c86 in __libc_start_main /build/glibc-CVJwZb/glibc-2.27/csu/../csu/libc-start.c:310
SUMMARY: MemorySanitizer: use-of-uninitialized-value /home/pavel/Dokumenty/meteorologie/otazka/src/neco.f90:93:13 in utils_mp_newnetwork_
Exiting
主程序中的这一行有一个问题:
net = newNetwork()
以及
newNetwork()
函数中的这个:
currentLayer%prevLayer => net ! is net missing a target attribute
net
在newNetwork()
中声明为局部对象,与主程序中声明的net
不同。这意味着您指向的东西在函数返回后就会消失。
有几种解决方案可以使其发挥作用:
newNetwork()
声明为 firstLayer
类的类型绑定过程,并从主程序中调用它:call net%netNetwork()
type(layerFirst) :: net
net => newNetwork()
type(layerFirst), pointer :: net
allocate(net)
net
指针也会消失,但main的net
指针最终会指向同一个内存区域。也许还有其他问题,应该先解决这个问题。