Tensorflow 2;如何创建具有预定义稀疏权重架构的自定义层gradient？

Question

我的目标是在Tensorflow 2中创建一个具有预定义的、固定的、稀疏权重结构的自定义图层。由于内存的原因，我们需要将权重矩阵以可训练的 SparseTensor 的形式保留。

到目前为止，我们的解决方案是将SparseTensor的权重值以标准可训练对象的形式表示在自定义层（"self.w"）中，并使用自定义层（"matmul_dense_sparse"）处理稀疏矩阵操作和反向传播。

我的问题是，如何在稀疏网络架构上使用自定义梯度进行反传播？

下面是一个玩具例子（TF 2.1.0；正在急切执行）。

import numpy as np
tf.__version__
# 2.1.0


@tf.custom_gradient
def matmul_dense_sparse(dense, sparse_mat):
    sparse_mat = tf.sparse.to_dense(sparse_mat)
    sparse_indices = sparse_mat.indices
    ta  = tf.transpose(dense)
    b   = sparse_mat
    tb  = tf.sparse.transpose(b)
    res = tf.transpose(tf.sparse.sparse_dense_matmul(tb, ta))

    def grad_fn(grad_res):
        print(grad_res)
        tgrad              = tf.transpose(grad_res)
        grad_dense         = tf.transpose(tf.sparse.sparse_dense_matmul(b, tgrad))
        dense_edge_starts  = tf.gather(dense, sparse_indices[:, 0], axis=1)
        grad_res_edge_ends = tf.gather(grad_res, sparse_indices[:, 1], axis=1)
        grad_values        = tf.reduce_sum(tf.multiply(dense_edge_starts, grad_res_edge_ends), axis=0)
        return grad_dense, grad_values

    return res, grad_fn


# custom layer with sparse weight architecture
class SparseLinear(tf.keras.layers.Layer):

    def __init__(self, indices, units=32):
        super(SparseLinear, self).__init__()
        self.units = units
        self.indices = indices

    def build(self, input_shape):

        # self.w corresponds to the trainable "values" in the sparse tensor
        self.w = self.add_weight(shape=(self.indices.shape[0],),
                                 initializer='random_normal',
                                 trainable=True)
        self.sparse_mat = tf.sparse.reorder(tf.sparse.SparseTensor(indices=self.indices, values=self.w, dense_shape=[input_shape[-1], self.units]))

    def call(self, x):
        return matmul_dense_sparse(x, self.sparse_mat)


# non-zero weights in SparseTensor
sp_idxs  = np.array([[0, 2],[1, 2], [8, 1], [9, 3], [10, 5]])

train_x = np.random.rand(20, 100)
train_y = np.random.rand(20, 1)

# build keras model
n_features = train_x.shape[1]
inputs     = tf.keras.Input(shape=(n_features,), name='snp_input')
layers     = SparseLinear(indices=sp_idxs, units=32)(inputs)
layers     = tf.keras.layers.Dense(12, activation='linear')(layers)
layers     = tf.keras.layers.Dense(1, activation='linear')(layers)
model      = tf.keras.Model(inputs=inputs, outputs=layers, name='model')

optimizer = tf.keras.optimizers.RMSprop(0.001)
model.compile(loss='mse',
              optimizer=optimizer,
              metrics=['mae', 'mse'])
history = model.fit(train_x, train_y)
weights = model.get_weights()

以及相应的错误信息：

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-11-34c364b12112> in <module>()
     51 n_features = train_x.shape[1]
     52 inputs     = tf.keras.Input(shape=(n_features,), name='snp_input')
---> 53 layers     = Linear(indices=sp_idxs, units=32)(inputs)
     54 layers     = tf.keras.layers.Dense(12, activation='linear')(layers)

~/.local/lib/python3.6/site-packages/tensorflow_core/python/keras/engine/base_layer.py in __call__(self, inputs, *args, **kwargs)
    771                     not base_layer_utils.is_in_eager_or_tf_function()):
    772                   with auto_control_deps.AutomaticControlDependencies() as acd:
--> 773                     outputs = call_fn(cast_inputs, *args, **kwargs)
    774                     # Wrap Tensors in `outputs` in `tf.identity` to avoid
    775                     # circular dependencies.

~/.local/lib/python3.6/site-packages/tensorflow_core/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    235       except Exception as e:  # pylint:disable=broad-except
    236         if hasattr(e, 'ag_error_metadata'):
--> 237           raise e.ag_error_metadata.to_exception(e)
    238         else:
    239           raise

TypeError: in converted code:

    <ipython-input-11-34c364b12112>:37 call  *
        return matmul_dense_sparse(x, self.sparse_mat)
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:256 __call__
        return self._d(self._f, a, k)
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:212 decorated
        return _graph_mode_decorator(wrapped, args, kwargs)
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:307 _graph_mode_decorator
        args = [ops.convert_to_tensor(x) for x in args]
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/custom_gradient.py:307 <listcomp>
        args = [ops.convert_to_tensor(x) for x in args]
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/ops.py:1314 convert_to_tensor
        ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:317 _constant_tensor_conversion_function
        return constant(v, dtype=dtype, name=name)
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:258 constant
        allow_broadcast=True)
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/constant_op.py:296 _constant_impl
        allow_broadcast=allow_broadcast))
    /home/kma52/.local/lib/python3.6/site-packages/tensorflow_core/python/framework/tensor_util.py:547 make_tensor_proto
        "supported type." % (type(values), values))

    TypeError: Failed to convert object of type <class 'tensorflow.python.framework.sparse_tensor.SparseTensor'> to Tensor. Contents: SparseTensor(indices=tf.Tensor(
    [[ 0  2]
     [ 1  2]
     [ 8  1]
     [ 9  3]
     [10  5]], shape=(5, 2), dtype=int64), values=tf.Tensor([ 0.03837506 -0.07365214 -0.02256368 -0.05631712  0.05937713], shape=(5,), dtype=float32), dense_shape=tf.Tensor([100  32], shape=(2,), dtype=int64)). Consider casting elements to a supported type.

这段代码正试图实现这些先前线程的见解。帖子#1, 第2个职位, 第3个职位

Answer 1

发一个看起来效果不错的解决方案。

class SparseLinear(tf.keras.layers.Layer):
def __init__(self, indices, units=32):
    super(SparseLinear, self).__init__()
    self.units = units
    self.indices = indices

def build(self, input_shape):

    self.n_feat = input_shape[-1]
    self.w = self.add_weight(shape=(len(self.indices),),
                             initializer=tf.keras.initializers.he_normal(),
                             trainable=True)

def call(self, x):
    kernel = tf.SparseTensor(self.indices, self.w, [self.n_feat, self.units])
    return tf.sparse.sparse_dense_matmul(x, kernel)

Tensorflow 2;如何创建具有预定义稀疏权重架构的自定义层gradient？

问题描述投票：0回答：1

1个回答

最新问题

Tensorflow 2;如何创建具有预定义稀疏权重架构的自定义层gradient？

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1