Keras fit和datagenerator返回 "检查输入时出错:预期输入_1有3个维度,但得到的是形状为(None, 1)的数组"

问题描述 投票:0回答:1

我正试图使用存储在pickle中的数据框架创建一个生成器。目标是3列,名称以 "target "开头。特征存储在 "input "列中,"input "的每个单元格都是np.array。输入 "的每个单元格都是np.array。它们的尺寸是51x7(7是通道数)(我现在只想关注数据生成器)

# General Libraries
import os
import math
import pickle
import numpy as np
import pandas as pd
from numpy import random

# Tensorflow and Keras
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import Input, Conv1D, Dense, Dropout, MaxPooling1D, Flatten

# Project Libraries
import config


class DataGenerator(Sequence):
    def __init__(self, path_experiment, batch_size, mode='train', validation_ratio=0.1):
        self.path_pickle = os.path.join(path_experiment, 'dataframe.pkl')
        self.dataframe = self.load_dataframe()
        self.batch_size = batch_size
        self.mode = mode
        self.indexes = None

        if mode == 'validation':
            self.dataframe = self.dataframe[-int(len(self.dataframe) * validation_ratio):].copy()
        else:
            self.dataframe = self.dataframe[:-int(len(self.dataframe) * validation_ratio)].copy()

        self.name_targets = [col for col in self.dataframe if col.startswith('target')]
        self.dataframe[self.name_targets] = np.log(self.dataframe[self.name_targets])

        self.targets = self.dataframe[self.name_targets].values
        self.inputs = self.dataframe['input'].values

    def load_dataframe(self):
        with open(self.path_pickle, 'rb') as f:
            return pickle.load(f)

    def __len__(self):
        return int(math.ceil(len(self.dataframe) / float(self.batch_size)))

    def on_epoch_end(self):
        self.indexes = range(len(self.dataframe))
        if self.mode == 'train':
            # Shuffles indexes after each epoch if in training mode
            self.indexes = random.sample(self.indexes, k=len(self.indexes))

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return self.targets[idx * self.batch_size: (idx + 1) * self.batch_size]

    def get_batch_features(self, idx):
        # Fetch a batch of inputs
        return self.inputs[idx * self.batch_size: (idx + 1) * self.batch_size]

    def __getitem__(self, idx):
        batch_x = self.get_batch_features(idx)
        batch_y = self.get_batch_labels(idx)
        # print(batch_x[0].shape)  # Display (51,7)
        # print(batch_x[0].shape)  # Display (3,)
        return batch_x, batch_y


path = os.path.join(config.path_model_input, 'experiment_2', '2')
train_generator = DataGenerator(path, batch_size=32, mode='train')
validation_generator = DataGenerator(path, batch_size=32, mode='validation')

input_shape = (51, 7)
i = Input(shape=input_shape)
x = Flatten()(i)
x = Dense(10, activation='relu')(x)
output = Dense(3, activation='linear')(x)
model = Model(inputs=i, outputs=output)

def RMSE(label, prediction):
    return tf.sqrt(tf.losses.mean_squared_error(label, prediction))

optimizer = Adam(lr=0.0001)
model.compile(optimizer=optimizer, loss=RMSE)
model.fit(train_generator, epochs=100, verbose=1, use_multiprocessing=False, workers=1)

当我试图运行这个代码时,我得到了这个错误。

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-7-6dde9c267440> in <module>
      7 optimizer = Adam(lr=0.0001)
      8 model.compile(optimizer=optimizer, loss=RMSE)
----> 9 model.fit(train_generator, epochs=100, verbose=1, use_multiprocessing=False, workers=1)#, validation_data=validation_generator)

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    817         max_queue_size=max_queue_size,
    818         workers=workers,
--> 819         use_multiprocessing=use_multiprocessing)
    820 
    821   def evaluate(self,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)
    233           max_queue_size=max_queue_size,
    234           workers=workers,
--> 235           use_multiprocessing=use_multiprocessing)
    236 
    237       total_samples = _get_total_number_of_samples(training_data_adapter)

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in _process_training_inputs(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    591         max_queue_size=max_queue_size,
    592         workers=workers,
--> 593         use_multiprocessing=use_multiprocessing)
    594     val_adapter = None
    595     if validation_data:

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in _process_inputs(model, mode, x, y, batch_size, epochs, sample_weights, class_weights, shuffle, steps, distribution_strategy, max_queue_size, workers, use_multiprocessing)
    704       max_queue_size=max_queue_size,
    705       workers=workers,
--> 706       use_multiprocessing=use_multiprocessing)
    707 
    708   return adapter

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, standardize_function, shuffle, workers, use_multiprocessing, max_queue_size, **kwargs)
    950         use_multiprocessing=use_multiprocessing,
    951         max_queue_size=max_queue_size,
--> 952         **kwargs)
    953 
    954   @staticmethod

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\data_adapter.py in __init__(self, x, y, sample_weights, standardize_function, workers, use_multiprocessing, max_queue_size, **kwargs)
    765 
    766     if standardize_function is not None:
--> 767       dataset = standardize_function(dataset)
    768 
    769     if kwargs.get("shuffle", False) and self.get_size() is not None:

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in standardize_function(dataset)
    682           return x, y
    683         return x, y, sample_weights
--> 684       return dataset.map(map_fn, num_parallel_calls=dataset_ops.AUTOTUNE)
    685 
    686   if mode == ModeKeys.PREDICT:

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\data\ops\dataset_ops.py in map(self, map_func, num_parallel_calls)
   1589     else:
   1590       return ParallelMapDataset(
-> 1591           self, map_func, num_parallel_calls, preserve_cardinality=True)
   1592 
   1593   def flat_map(self, map_func):

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\data\ops\dataset_ops.py in __init__(self, input_dataset, map_func, num_parallel_calls, use_inter_op_parallelism, preserve_cardinality, use_legacy_function)
   3924         self._transformation_name(),
   3925         dataset=input_dataset,
-> 3926         use_legacy_function=use_legacy_function)
   3927     self._num_parallel_calls = ops.convert_to_tensor(
   3928         num_parallel_calls, dtype=dtypes.int32, name="num_parallel_calls")

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\data\ops\dataset_ops.py in __init__(self, func, transformation_name, dataset, input_classes, input_shapes, input_types, input_structure, add_to_graph, use_legacy_function, defun_kwargs)
   3145       with tracking.resource_tracker_scope(resource_tracker):
   3146         # TODO(b/141462134): Switch to using garbage collection.
-> 3147         self._function = wrapper_fn._get_concrete_function_internal()
   3148 
   3149         if add_to_graph:

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal(self, *args, **kwargs)
   2393     """Bypasses error checking when getting a graph function."""
   2394     graph_function = self._get_concrete_function_internal_garbage_collected(
-> 2395         *args, **kwargs)
   2396     # We're returning this concrete function to someone, and they may keep a
   2397     # reference to the FuncGraph without keeping a reference to the

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _get_concrete_function_internal_garbage_collected(self, *args, **kwargs)
   2387       args, kwargs = None, None
   2388     with self._lock:
-> 2389       graph_function, _, _ = self._maybe_define_function(args, kwargs)
   2390     return graph_function
   2391 

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _maybe_define_function(self, args, kwargs)
   2701 
   2702       self._function_cache.missed.add(call_context_key)
-> 2703       graph_function = self._create_graph_function(args, kwargs)
   2704       self._function_cache.primary[cache_key] = graph_function
   2705       return graph_function, args, kwargs

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\eager\function.py in _create_graph_function(self, args, kwargs, override_flat_arg_shapes)
   2591             arg_names=arg_names,
   2592             override_flat_arg_shapes=override_flat_arg_shapes,
-> 2593             capture_by_value=self._capture_by_value),
   2594         self._function_attributes,
   2595         # Tell the ConcreteFunction to clean up its graph once it goes out of

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\framework\func_graph.py in func_graph_from_py_func(name, python_func, args, kwargs, signature, func_graph, autograph, autograph_options, add_control_dependencies, arg_names, op_return_value, collections, capture_by_value, override_flat_arg_shapes)
    976                                           converted_func)
    977 
--> 978       func_outputs = python_func(*func_args, **func_kwargs)
    979 
    980       # invariant: `func_outputs` contains only Tensors, CompositeTensors,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\data\ops\dataset_ops.py in wrapper_fn(*args)
   3138           attributes=defun_kwargs)
   3139       def wrapper_fn(*args):  # pylint: disable=missing-docstring
-> 3140         ret = _wrapper_helper(*args)
   3141         ret = structure.to_tensor_list(self._output_structure, ret)
   3142         return [ops.convert_to_tensor(t) for t in ret]

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\data\ops\dataset_ops.py in _wrapper_helper(*args)
   3080         nested_args = (nested_args,)
   3081 
-> 3082       ret = autograph.tf_convert(func, ag_ctx)(*nested_args)
   3083       # If `func` returns a list of tensors, `nest.flatten()` and
   3084       # `ops.convert_to_tensor()` would conspire to attempt to stack

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\autograph\impl\api.py in wrapper(*args, **kwargs)
    235       except Exception as e:  # pylint:disable=broad-except
    236         if hasattr(e, 'ag_error_metadata'):
--> 237           raise e.ag_error_metadata.to_exception(e)
    238         else:
    239           raise

ValueError: in converted code:

    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py:677 map_fn
        batch_size=None)
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training.py:2410 _standardize_tensors
        exception_prefix='input')
    C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_utils.py:573 standardize_input_data
        'with shape ' + str(data_shape))

    ValueError: Error when checking input: expected input_1 to have 3 dimensions, but got array with shape (None, 1)

我在这个问题上通过了相当长的时间,我似乎找不到这个错误的原因,我检查了几次所有的输入输出的尺寸,我看不到我所犯的错误.任何帮助将是非常感激!

EDIT:print(self.inputs.shape) 还有 print(self.targets.shape) 分别给出(1101,)和(1101,3)。

我试着做了 self.dataframe['input']=self.dataframe['input'].apply(lambda x: np.array(x)) 但它仍然返回(1101,)而不是(1101,57,3)

我还检查了列输入中所有数组的形状。

train_generator.dataframe['input_shape']=train_generator.dataframe['input'].apply(lambda x: x.shape)
train_generator.dataframe['input_shape'].unique()

它返回的是: array([(51, 7)], dtype=object)我不知道它返回 "对象 "作为列的类型是否正常。

python tensorflow keras
1个回答
1
投票

解决方案在这个线程中。(详情请看)

将pandas列的numpy数组转换为更高维度的numpy数组。

原来我原来想用dataframe做的事情不太可能。而我却用了.NET Framework。self.inputs = np.stack(self.dataframe['input'].to_numpy())


0
投票

一定是你的 dataframe 这意味着存储的文件存在一些问题,导致加载时形状不匹配。

为了验证这就是问题所在,我保持你的模型创建和拟合代码完全一样,但写一个测试生成器也和你原来的代码几乎一样,只是数据是在飞行中生成的,这正确的形状。

class DataGenerator(Sequence):
    def __init__(self, path_experiment, batch_size, mode='train', validation_ratio=0.1):
        self.batch_size = batch_size
        self.mode = mode
        self.indexes = None

        self.inputs =  np.random.random([batch_size,51,7] )
        self.targets = np.random.random([batch_size,3] )


    def __len__(self):
        return int((51*7) / float(self.batch_size))

    def get_batch_labels(self, idx):
        # Fetch a batch of labels
        return self.targets[idx * self.batch_size: (idx + 1) * self.batch_size]

    def get_batch_features(self, idx):
        # Fetch a batch of inputs
        return self.inputs[idx * self.batch_size: (idx + 1) * self.batch_size]

    def __getitem__(self, idx):
        batch_x = self.get_batch_features(idx)
        batch_y = self.get_batch_labels(idx)
        # print(batch_x[0].shape)  # Display (51,7)
        # print(batch_x[0].shape)  # Display (3,)
        return batch_x, batch_y

path=''
train_generator = DataGenerator(path, batch_size=32, mode='train')
validation_generator = DataGenerator(path, batch_size=32, mode='validation')

在这里,我确保输入的是形状为 [batch, 51,7]. 运行你的模型创建代码和适合,它的工作原理

Epoch 1/100
11/11 [==============================] - 0s 1ms/step - loss: 0.3640
Epoch 2/100
11/11 [==============================] - 0s 884us/step - loss: 0.3508
Epoch 3/100
11/11 [==============================] - 0s 1ms/step - loss: 0.3350

问题一定在 self.dataframe['input'].values. 增加 print(self.dataframe['input'].values.shape) 看看是什么

© www.soinside.com 2019 - 2024. All rights reserved.