TensorFlow:整个训练过程中的CosineDifference ObjFunc常数

问题描述 投票:0回答:1

以下示例是我正在研究的简化版本。我正在尝试找到一个最小化余弦距离的神经网络。我之所以实现我自己的余弦差损失函数而不是使用tensorflow的内置方法的原因是因为在我的项目的完整版本中,它不能完全满足我的要求(尽管在此简单版本中它们是等效的。)

我将两个正交矢量(A和B)输入到网络中。我正在尝试减小(A和B之间的)余弦距离。它是通过使损失函数最小化来实现的(该函数还包括一个在向量B转换时保留其长度的分量。)最终,我的输出应该是一个向量具有与向量A相同的方向,长度为向量B。

我遇到的问题是网络输出“ vector_B_transformed”,并且此向量永远不变。我建立的损失函数在整个训练过程中也是恒定的。我尝试以不同的方式初始化权重,但这没有帮助。我从来没有在完全连接的网络的最后一层使用relu功能,也没有在隐藏层上尝试过relu激活功能-但这似乎没有什么不同。

我将结果粗略地添加到列表中,然后将其打印到终端。纪元减少到200,但增加时出现相同的问题。

[如果有人能帮助我,将不胜感激,因为我真的很受困扰。

import math
import numpy as np
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.framework import ops

# from utils import *

##### New Helper Functions

# weight and bais wrappers
def weight_variable(name, shape):
    """
    Create a weight variable with appropriate initialization
    :param name: weight name
    :param shape: weight shape
    :return: initialized weight variable
    """
    initer = tf.truncated_normal_initializer(stddev=0.01)
    return tf.get_variable('W_' + name,
                           dtype=tf.float32,
                           shape=shape,
                           initializer=initer)

def bias_variable(name, shape):
    """
    Create a bias variable with appropriate initialization
    :param name: bias variable name
    :param shape: bias variable shape
    :return: initialized bias variable
    """
    initial = tf.constant(0., shape=shape, dtype=tf.float32)
    return tf.get_variable('b_' + name,
                           dtype=tf.float32,
                           initializer=initial)


def fc_layer(x, num_units, name, use_relu=True):
    """
    Create a fully-connected layer
    :param x: input from previous layer
    :param num_units: number of hidden units in the fully-connected layer
    :param name: layer name
    :param use_relu: boolean to add ReLU non-linearity (or not)
    :return: The output array
    """
    in_dim = x.get_shape()[1]
    W = weight_variable(name, shape=[in_dim, num_units])
    b = bias_variable(name, [num_units])
    layer = tf.matmul(x, W)
    layer += b
    if use_relu:
        layer = tf.nn.relu(layer)
    return layer
## loss function
def cosine_distance_simple(A, B):

    normalize_A = tf.nn.l2_normalize(A,1)        
    normalize_B = tf.nn.l2_normalize(B,1)

    distance_matrix = 1 - tf.matmul(normalize_A, normalize_B, transpose_b=True)

    distance_matrix = tf.diag_part(distance_matrix)

    distance = tf.reduce_sum(distance_matrix)

    return distance

def maintain_length(A, B):

    return (tf.norm(A) - tf.norm(B))

from __future__ import division
import tensorflow as tf

# generator network without residual block
def generator(vector, reuse=False, name="generator"):

    with tf.variable_scope(name):
        if reuse:
            tf.get_variable_scope().reuse_variables()
        else:
            assert tf.get_variable_scope().reuse is False

        output_dimension = vector.shape[1]

        e1 = fc_layer(vector, 2, name='g_e1', use_relu=False)
        e2 = fc_layer(e1, 4, name='g_e2', use_relu=False)    
        e3 = fc_layer(e2, 8, name='g_e3', use_relu=False)
        e4 = fc_layer(e3, 16, name='g_e4', use_relu=False)
        e5 = fc_layer(e4, 16, name='g_e5', use_relu=False)
        e6 = fc_layer(e5, 8, name='g_e6', use_relu=False)
        e7 = fc_layer(e6, 4, name='g_e7', use_relu=False)
        e8 = fc_layer(e7, output_dimension, name='g_e8', use_relu=False)


        return e8
from __future__ import division
import os
import time
from glob import glob
import tensorflow as tf
import numpy as np
from collections import namedtuple
from sklearn.model_selection import train_test_split

# from module import *
# from utils import *

class cosine_diff_test(object):
    def __init__(self, sess, args):
        # initialise tensorflow session
        self.sess = sess

        # data, test, train splits
        self.data_A = args.vA
        self.data_B = args.vB

        self.generator = generator

        # when an instance of class cycleGAN is created, build model is automatically called
        self._build_model()


    def _build_model(self):

        #### INPUTS TO NETWORKS
        # placeholder for vectors
        self.vector_A = tf.placeholder(tf.float32,
                                     [None, 2],
                                     name='vector_A')
        self.vector_B = tf.placeholder(tf.float32,
                                    [None, 2],
                                    name='vector_B')

        # FCNN to determine vector move required
        self.vector_B_ = self.generator(self.vector_B, False, name="generatorB")

        # minimise cos_dist between A and B while keeping A same

        self.loss = cosine_distance_simple(self.vector_A, self.vector_B_) \
                        + maintain_length(self.vector_B, self.vector_B_)
        '''

        self.loss = abs_criterion(self.vector_A, self.vector_A_) \
                + abs_criterion(self.vector_B, self.vector_B_)
        '''

        # trainable variables
        t_vars = tf.trainable_variables()

        # training variables for generator
        self.g_vars = [var for var in t_vars if 'generator' in var.name]




    def train(self, args):
        # placeholder for learning rate
        self.lr = tf.placeholder(tf.float32, None, name='learning_rate')

        # define optimizer
        self.optim = tf.train.AdamOptimizer(self.lr, beta1=args.beta1).minimize(self.loss, var_list=self.g_vars)



        # initialise global varibles and run session
        init_op = tf.global_variables_initializer()

        self.sess.run(init_op)

        lr = args.lr

        # Import Data
        vecA = self.data_A.copy()
        vecB = self.data_B.copy()

        results_loss = []
        results_vector_B_transformed = []

        # iterate over the number of epochs definied
        for epoch in range(args.epoch):

            # Update 
            vector_B_transformed, _ = self.sess.run(
                [self.vector_B_, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})

            results_loss.append(_)
            results_vector_B_transformed.append(vector_B_transformed)

        print(results_loss)
        print(results_vector_B_transformed)



        origin = args.orig
        print('plotting ...')
        plt.xlim((-0.5,1.5));
        plt.ylim((-0.5,2.5));
        plt.quiver(*origin, vecA, vector_B_transformed, 
                   color=['r','b'],angles='xy', scale_units='xy', scale=1);


class Args():
    A_vec = np.array([1, 0]).reshape(1,-1)
    B_vec = np.array([0, 2]).reshape(1,-1)
    ori = np.array([0, 0]).reshape(1,-1)


    epoch = 200
    lr = 0.0002
    vA = A_vec
    vB = B_vec
    beta1 = 0.5
    orig = ori

args = Args()
# TRAIN
tf.reset_default_graph()

tfconfig = tf.ConfigProto(allow_soft_placement=True)
tfconfig.gpu_options.allow_growth = True
with tf.Session(config=tfconfig) as sess:
    model = cosine_diff_test(sess, args)
    model.train(args) 
tensorflow deep-learning cosine-similarity
1个回答
0
投票

好,所以我发现了问题,最后是简单的错误:

我的优化程序不在我的更新中

            vector_B_transformed, _ = self.sess.run(
                [self.vector_B_, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})
            # Update 
            vector_B_transformed, _, loss = self.sess.run(
                [self.vector_B_, self.optim, self.loss],
                feed_dict={self.vector_A: vecA, 
                           self.vector_B: vecB,  
                           self.lr: lr})

我的代码仍然无法按预期工作,但至少是在尝试优化某些东西以便进步!

© www.soinside.com 2019 - 2024. All rights reserved.