使用tensorarray存储中间结果时没有为任何变量提供梯度

问题描述 投票:0回答:1

为了练习,我正在研究 Keras 中的反馈循环自动编码器。我使用的代码是

import tensorflow as tf
import keras
import os

class Linear(keras.layers.Layer):
    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b
    


class FRAE(tf.keras.Model):
    def __init__(self):
        super(FRAE, self).__init__()
        self.linear_1 = Linear(4)
        self.linear_2 = Linear(3)
        self.latent   = Linear(1)
        self.linear_3 = Linear(3)
        self.linear_4 = Linear(2)
        self.decoded  = tf.zeros(shape=(1, 2))
        
    def call(self, inputs):
        #x = self.flatten(inputs)
        
        batch_size = inputs.shape[0]
        input_dim = inputs.shape[1]
        # output_list = [None] * batch_size #tf.zeros(shape = (batch_size, input_dim))
        output_list = tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)

        for i in range(batch_size):
            x = tf.concat((tf.expand_dims(inputs[i], axis=0),self.decoded),axis=1)
            x = self.linear_1(x)
            x = tf.nn.swish(x)
            x = self.linear_2(x)
            x = tf.nn.swish(x)
            x = self.latent(x)
            x = tf.nn.swish(x)
            x = tf.concat((x,self.decoded),axis=1)
            x = self.linear_3(x)
            x = tf.nn.swish(x)
            x = self.linear_4(x)
            x = tf.nn.swish(x)
            self.decoded = tf.identity(x)
            output_list.write(i,  x)
        y = output_list.stack()
        return y


        
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
        


xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse")
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)

当我运行这段代码时,出现错误

ValueError:没有为任何变量提供梯度:(['frae_13/linear_65/Variable:0', 'frae_13/linear_65/Variable:0', 'frae_13/linear_66/Variable:0', 'frae_13/linear_66/Variable:0 ', 'frae_13/linear_67/Variable:0', 'frae_13/linear_67/Variable:0', 'frae_13/linear_68/Variable:0', 'frae_13/linear_68/Variable:0', 'frae_13/linear_69/Variable:0 ', 'frae_13/linear_69/变量:0'])。提供

grads_and_vars
是((无,),(无,),(无,),(无,),(无,),(无,),(无,),(无, ), (无, ), (无, )).

这可能源于使用

TensorArray
来存储批次样本的输出。不知何故,梯度丢失/无法计算。

有人知道如何计算这种情况下的梯度吗?

我试着用谷歌搜索常见问题,但这一个比较特殊,所以我找到的解决方案并不是很有帮助。

python tensorflow keras gradient autoencoder
1个回答
0
投票

由于 Alberto 关于使用列表的评论,我得到了它。

我是这样做的:

import tensorflow as tf
import keras
import os

class Linear(keras.layers.Layer):
    def __init__(self, units=32):
        super(Linear, self).__init__()
        self.units = units

    def build(self, input_shape):
        self.w = self.add_weight(
            shape=(input_shape[-1], self.units),
            initializer="random_normal",
            trainable=True,
        )
        self.b = self.add_weight(
            shape=(self.units,), initializer="random_normal", trainable=True
        )

    def call(self, inputs):
        return tf.matmul(inputs, self.w) + self.b#tf.matmul(inputs, self.w) + self.b
    


class FRAE(tf.keras.Model):
    def __init__(self):
        super(FRAE, self).__init__()
        self.linear_1 = Linear(4)
        self.linear_2 = Linear(3)
        self.latent   = Linear(1)
        self.linear_3 = Linear(3)
        self.linear_4 = Linear(2)
        self.decoded  = [[0,0]] # tf.zeros(shape=(1, 2))
    
    
    def call(self, inputs):
        #x = self.flatten(inputs)
        
        batch_size = inputs.shape[0]
        input_dim = inputs.shape[1]
        output_list = [None]*batch_size #tf.TensorArray(tf.float32, size=batch_size, clear_after_read=False)
        #y = tf.Variable()
        for i in range(batch_size):
            x = tf.concat((tf.expand_dims(inputs[i], axis=0),tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
            x = self.linear_1(x)
            x = tf.nn.swish(x)
            x = self.linear_2(x)
            x = tf.nn.swish(x)
            x = self.latent(x)
            x = tf.nn.swish(x)
            x = tf.concat((x,tf.convert_to_tensor(self.decoded, dtype=tf.float32)),axis=1)
            x = self.linear_3(x)
            x = tf.nn.swish(x)
            x = self.linear_4(x)
            x = tf.nn.swish(x)
            self.decoded = x.numpy().tolist() #tf.identity(x)
            output_list[i] = x #output_list.write(i,  x)
        y = tf.convert_to_tensor(output_list)    #  output_list.stack()
        return y


        
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
        


xtrain = tf.random.uniform(shape=(10,2))
model = FRAE()
y = model(xtrain)
optimizer = keras.optimizers.Adam(lr=0.001)
model.compile(optimizer=optimizer,loss="mse", run_eagerly=True)
model.fit(x=xtrain,y=xtrain, epochs=50, batch_size=1)

这当然是一个原型,所以对于随机数据,输出的反馈根本没有好处。对于相关数据,它应该会产生一些好处。

© www.soinside.com 2019 - 2024. All rights reserved.