我目前正在使用 torch 来训练多线程深度学习网络,我需要注册一个信号处理程序,以便在我按下
ctrl+c
时可以保存网络的权重。但是问题来了,我发现多线程程序只是忽略了信号,我完全不知道为什么......
见下方代码
-- threadTester.lua
local classic = require 'classic'
local threads = require 'threads'
threads.Threads.serialization('threads.sharedserialize')
local tds = require 'tds'
local threadTester = classic.class('threadTester')
function threadTester:_init(atomic)
self.game = threads.Threads(1,
function ()
inner = atomic
end)
classic.strict(self)
end
function threadTester:play()
self.game:addjob(function ()
while true do
if inner:get() < 0 then break end
end
end)
-- do some stuff outside
end
return threadTester
和
local threads = require 'threads'
threads.Threads.serialization('threads.sharedserialize')
local tds = require 'tds'
local signal = require 'posix.signal'
local ctrlpool = threads.Threads(1, function ()
local tds = require 'tds'
end)
local atomic = tds.AtomicCounter()
atomic:set(1)
nThreads = 4
local ctrlPool = threads.Threads(1)
ctrlPool:addjob(function ()
local signal = require 'posix.signal'
signal.signal(signal.SIGINT, function(signum)
print('\nSIGINT received')
print('Ex(c)iting')
atomic:set(-1)
end)
end)
ctrlPool:synchronize()
local gamePool = threads.Threads(nThreads, function ()
threadTester = require 'threadTester'
player = threadTester(atomic)
end)
for i = 1, nThreads do
gamePool:addjob(function ()
print(string.format("begin in thread %d", __threadid))
local status, err = xpcall(player.play, debug.traceback, player)
if not status then
print(string.format('%s', err))
os.exit(128)
end
end)
end
gamePool:synchronize()
gamePool:terminate()
macOS 10.12,torch 7,调用
th test.lua
,然后按ctrl+c
,没有任何反应。
但是如果我改变test.lua
如下
local threads = require 'threads'
threads.Threads.serialization('threads.sharedserialize')
local tds = require 'tds'
local signal = require 'posix.signal'
-- local ctrlpool = threads.Threads(1, function ()
-- local tds = require 'tds'
-- end)
local atomic = tds.AtomicCounter()
atomic:set(1)
nThreads = 4
-- local ctrlPool = threads.Threads(1)
-- ctrlPool:addjob(function ()
local signal = require 'posix.signal'
signal.signal(signal.SIGINT, function(signum)
print('\nSIGINT received')
print('Ex(c)iting')
atomic:set(-1)
end)
-- end)
-- ctrlPool:synchronize()
local gamePool = threads.Threads(nThreads, function ()
threadTester = require 'threadTester'
player = threadTester(atomic)
end)
for i = 1, nThreads do
gamePool:addjob(function ()
print(string.format("begin in thread %d", __threadid))
local status, err = xpcall(player.play, debug.traceback, player)
if not status then
print(string.format('%s', err))
os.exit(128)
end
end)
end
gamePool:synchronize()
gamePool:terminate()
一切如预期
> th test.lua
begin in thread 1
begin in thread 2
begin in thread 3
begin in thread 4
^C
SIGINT received
Ex(c)iting
但是如果我使用
qlua
>qlua test.lua
begin in thread 1
begin in thread 2
begin in thread 3
begin in thread 4
^C^C^C^C^C^C^Z
[3] + 15370 suspended qlua test.lua
又失败了... 我很困惑...我不知道为什么它在
qlua
中不起作用,而且我确实看到有人在 ctrl 池中注册了信号处理程序...
如果我像这样改变
threadTester.lua
local classic = require 'classic'
local threads = require 'threads'
threads.Threads.serialization('threads.sharedserialize')
local tds = require 'tds'
local threadTester = classic.class('threadTester')
function threadTester:_init(atomic)
self.game = threads.Threads(1,
function ()
inner = atomic
end)
self.atomic = atomic
classic.strict(self)
end
function threadTester:play()
self.game:addjob(function ()
os.execute("sleep ".. 10)
end)
-- do some stuff outside
while true do
if self.atomic:get() < 0 then break end
end
end
return threadTester
它的功能更奇怪(使用无控制池
test.lua
)
th test.lua
begin in thread 1
begin in thread 2
begin in thread 3
begin in thread 4
^C^C^C^C^C
SIGINT received
Ex(c)iting
好像
os.execute(something)
结束后才收到信号
我把它放在这里以防其他人遇到这个问题。 正在经历的行为正是 luaposix 文档提到的: https://luaposix.github.io/luaposix/modules/posix.signal.html
注意 posix.signal.signal 是用 sigaction(2) 实现的,以实现跨平台的一致语义。另请注意,已安装的信号处理程序不会在信号出现时立即调用。相反,为了保持解释器状态干净,它们在调试钩子的上下文中执行,一旦解释器进入新函数、从当前正在执行的函数返回或当前指令执行完毕后,调试钩子就会被调用。结束了
所以是的,您的信号处理程序仅在
os.execute
返回后运行。