ValueError:“model_69”层需要 3 个输入,但它收到了 96 个输入张量

问题描述 投票:0回答:0

我试图不在 for 循环中使用 model.predict() 或 model.fit() 来加速训练,所以我试图对我的案例实施 this solution 但我得到一个错误。该模型有三个输入。

这是我的代码:

n_possible_movements = 9
MINIBATCH_SIZE = 32

class DQNAgent(object):
    def __init__(self):
        self.epsilon = 1.0
        self.epsilon_decay = 0.8
        self.epsilon_min = 0.1
        self.learning_rate = 10e-4
        self.tau = 1e-3
        
                
        # Main models
        self.model_uav_pos = self._build_pos_model()

        # Target networks
        self.target_model_uav_pos = self._build_pos_model()
        # Copy weights
        self.target_model_uav_pos.set_weights(self.model_uav_pos.get_weights())

        # An array with last n steps for training
        self.replay_memory_pos_nn = deque(maxlen=REPLAY_MEMORY_SIZE)
        
    def _build_pos_model(self): # compile the DNN
        # create the DNN model
        dnn = self.create_pos_dnn()
        
        opt = Adam(learning_rate=self.learning_rate) #, decay=self.epsilon_decay)
        dnn.compile(loss="mse", optimizer=opt)
        
        return dnn
    
    def create_pos_dnn(self): 
        # initialize the input shape
        pos_input_shape = (2,)
        requests_input_shape = (len(env.ues),)
        number_of_satisfied_ues_input_shape = (1,)
        # How many possible outputs we can have
        output_nodes = n_possible_movements
        
        # Initialize the inputs
        uav_current_position = Input(shape=pos_input_shape, name='pos')
        ues_requests = Input(shape=requests_input_shape, name='requests')
        number_of_satisfied_ues = Input(shape=number_of_satisfied_ues_input_shape, name='number_of_satisfied_ues')
        
        # Put them in a list
        list_inputs = [uav_current_position, ues_requests, number_of_satisfied_ues]
        
        # Merge all input features into a single large vector
        x = layers.concatenate(list_inputs)
        
        # Add a 1st Hidden (Dense) Layer
        dense_layer_1 = Dense(512, activation="relu")(x)
        
        # Add a 2nd Hidden (Dense) Layer
        dense_layer_2 = Dense(512, activation="relu")(dense_layer_1)
        
        # Add a 3rd Hidden (Dense) Layer
        dense_layer_3 = Dense(256, activation="relu")(dense_layer_2)
        
        # Output layer
        output_layer = Dense(output_nodes, activation="linear")(dense_layer_3)

        model = Model(inputs=list_inputs, outputs=output_layer)
                        
        # return the DNN
        return model
    
    def remember_pos_nn(self, state, action, reward, next_state, done):
        self.replay_memory_pos_nn.append((state, action, reward, next_state, done)) # list of previous experiences, enabling re-training later
        
    def act_upon_choosing_a_new_position(self, state): # state is a tuple (uav_position, requests_array, number_satisfaction)
        if np.random.rand() <= self.epsilon: # if acting randomly, take random action
            return random.randrange(n_possible_movements)
        pos =  np.array([state[0]])
        reqs =  np.array([state[1]])
        number_satisfaction = np.array([state[2]])
        act_values = self.model_uav_pos.predict([pos, reqs, number_satisfaction]) # if not acting randomly, predict reward value based on current state
        return np.argmax(act_values[0]) 

    def target_train(self):
        weights = self.model_uav_pos.get_weights()
        target_weights = self.target_model_uav_pos.get_weights()
        for i in range(len(target_weights)):
            target_weights[i] = weights[i] * self.tau + target_weights[i] * (1 - self.tau)
        self.target_model_uav_pos.set_weights(target_weights)

这是我介绍链接中建议的更改之前的训练功能

def train_pos_nn(self):
        print("In Training..")

        # Start training only if certain number of samples is already saved
        if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
            print("Exiting Training: Replay Memory Not Full Enough...")
            return

        # Get a minibatch of random samples from memory replay table
        list_memory = list(self.replay_memory_pos_nn)
        random.shuffle(list_memory)
        minibatch = random.sample(list_memory, MINIBATCH_SIZE)

        start_time = time.time()
        # Enumerate our batches
        for index, (current_state, action, reward, new_current_state, done) in enumerate(minibatch):
            print('...Starting Training...')
            target = 0
            pos =  np.array([current_state[0]])
            reqs =  np.array([current_state[1]])
            number_satisfaction = np.array([current_state[2]])
            pos_next = np.array([new_current_state[0]])
            reqs_next = np.array([new_current_state[1]])
            number_satisfaction_next = np.array([new_current_state[2]])
    
            # If not a terminal state, get new q from future states, otherwise set it to 0
            # almost like with Q Learning, but we use just part of equation here
            if not done:
                print("Predict Next State")
                target = reward + DISCOUNT * np.amax(self.target_model_uav_pos.predict([pos_next, reqs_next, number_satisfaction_next]))
            else:
                target = reward

            # Update Q value for given state
            print("Predict State")
            target_f = self.model_uav_pos.predict([pos, reqs, number_satisfaction])
            target_f = np.array(target_f)
            target_f[0][action] = target

            self.model_uav_pos.fit([pos, reqs, number_satisfaction], \
                                   target_f, \
                                   verbose=2, \
                                   shuffle=False, \
                                   callbacks=None, \
                                   epochs=1 \
                                  )  
        end_time = time.time()
        print("Time", end_time - start_time)
        # Update target network counter every episode
        self.target_train()

这是我引入变化后的训练功能:

def train_pos_nn(self):
        print("In Training..")

        # Start training only if certain number of samples is already saved
        if len(self.replay_memory_pos_nn) < MIN_REPLAY_MEMORY_SIZE:
            print("Exiting Training: Replay Memory Not Full Enough...")
            return

        # Get a minibatch of random samples from memory replay table
        list_memory = list(self.replay_memory_pos_nn)
        random.shuffle(list_memory)
        minibatch = random.sample(list_memory, MINIBATCH_SIZE)
        
        # Draw a sample
        samples = random.sample(list_memory, MINIBATCH_SIZE)
        
        start_time = time.time()
        # Prepare the batch
        state, action, reward, new_state, done = zip(*samples)
        nstate = []
        cstate = []
        start_time_2 = time.time()
        for n_state in new_state:
            pos_next = np.array([n_state[0]])
            reqs_next = np.array([n_state[1]])
            number_satisfaction_next = np.array([n_state[2]])
            nstate.append([pos_next,reqs_next,number_satisfaction_next])
        for curr_state in state:
            pos =  np.array([curr_state[0]])
            reqs =  np.array([curr_state[1]])
            number_satisfaction = np.array([curr_state[2]])
            cstate.append([pos,reqs,number_satisfaction])
        end_time_2 = time.time()
        print("Time 2", end_time_2 - start_time_2)
        #next_state = np.concatenate(new_state)
        #next_state = np.concatenate(nstate)
        #print("next_state", nstate[0], "len", len(nstate))#np.asarray(nstate).shape)# np.shape(nstate))
        done = np.array(done)[:,None]
        state = np.concatenate(state)
        reward = np.array(reward)[:,None]
        q_future = self.target_model_uav_pos.predict(nstate)#np.vstack(nstate))
        targets = reward + self.gamma*np.max(q_future, axis=1, keepdims=True)
        
        # Fit the model
        self.model.fit(cstate, targets, epochs=1, verbose=2)
        
        end_time = time.time()
        print("Time", end_time - start_time)
        self.target_train()

这一行

q_future = self.target_model_uav_pos.predict(nstate)
抛出错误
ValueError: Layer "model_69" expects 3 input(s), but it received 96 input tensors
(nstate中的32个示例中的每一个都有3个输入,当我使用predict_on_batch()时出现相同的错误)

我不知道如何正确地做。任何帮助将不胜感激。

tensorflow keras deep-learning reinforcement-learning dqn
© www.soinside.com 2019 - 2024. All rights reserved.