我正在尝试构建一个简单的深度 lstm 神经网络。总的来说,我对机器学习没有太多经验。但我确实知道一些基础知识。这是我的示例模型。
const model = tf.sequential();
const {layers} = tf;
model.add(layers.lstm({
units : 256,
inputShape : [1, 6],
activation : "relu",
returnSequences : true,
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.batchNormalization());
model.add(layers.lstm({
units : 128,
activation : "relu",
returnSequences : true,
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.batchNormalization());
model.add(layers.lstm({
units : 128,
activation : "relu",
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.batchNormalization());
model.add(layers.dense({
units : 32,
activation : "relu",
}));
model.add(layers.dropout({rate: 0.2}));
model.add(layers.dense({
units : 2,
activation : "softmax",
}));
model.compile({
//loss : "sparseCategoricalCrossentropy",
loss : "categoricalCrossentropy",
optimizer : tf.train.adam(0.001),
metrics : ['acc']
});
当我尝试训练模型时,出现此错误。
const result = await model.fit(tf.ones([1, 1, 6]), tf.ones([1, 2]));
Error: Argument tensors passed to stack must be a `Tensor[]` or `TensorLike[]`
我在 github 上发现了一个关于 bug 的thread。但我不认为这是
tfjs
bug。因为那条线索是一年多前的事了。如果这是一个错误,我很确定谷歌已经修复了。我想我在这里做错了什么。我也在 python 中尝试了完全相同的模型,它工作正常......但我不想使用 python。自从nodejs出现以来,我很多年前就退出了python。与 python 相比,我更喜欢 javascript,而且它对我来说更容易维护。你能帮我理解这里出了什么问题吗?
我认为这是 tf.js 中的一个错误,即 RNN 层不接受只有一个元素的序列(拟合时抛出错误)。根据我的理解,当使用有状态 RNN 时,一次只传递一个元素可能是有意义的(如果我错了,请纠正我)。
作为测试的时间解决方案,我只需重复输入张量,然后将其连接到 RNN 层。理论上,RNN 应该学会忽略重复的帧:
outputs = tf.layers.flatten().apply(outputs)
outputs = tf.layers.repeatVector({n: 2}).apply(outputs)
outputs = tf.layers.gru({units: 32}).apply(outputs)
此外,我还制作了一个自定义有状态 GRU 层,它只能接受一个元素(仅用于测试)。然而,我并没有发现重复有多大区别:
class MyGruLayer extends tf.layers.Layer {
constructor(args) {
super(args)
this.cell = new tf.layers.gruCell(args)
this.states_ = null
this.keptStates = []
}
build(inputShape) {
this.cell.build(inputShape)
this.resetStates()
this.stateSpec = {shape: [null, this.cell.stateSize]}
this.built = true
}
computeOutputShape(inputShape) {
return [inputShape[0], this.cell.stateSize]
}
call(inputs, kwargs) {
return tf.tidy(() => {
const training = kwargs == null ? null : kwargs['training']
const cellCallKwargs = {training}
const input = inputs[0]
const initialState= this.states_
const [outputs, ...states] = this.cell.call([input].concat(initialState), cellCallKwargs)
this.resetStates(states, training)
return outputs
})
}
/* Method from https://github.com/tensorflow/tfjs/blob/tfjs-v3.12.0/tfjs-layers/src/layers/recurrent.ts#L562 */
resetStates(states, training = false) {
tf.tidy(() => {
if (this.states_ == null) {
if (Array.isArray(this.cell.stateSize)) {
this.states_ = this.cell.stateSize.map(dim => tf.zeros([batchSize, dim]));
} else {
this.states_ = [tf.zeros([batchSize, this.cell.stateSize])];
}
} else if (states == null) {
// Dispose old state tensors.
tf.dispose(this.states_);
// For stateful RNNs, fully dispose kept old states.
if (this.keptStates != null) {
tf.dispose(this.keptStates);
this.keptStates = [];
}
if (Array.isArray(this.cell.stateSize)) {
this.states_ = this.cell.stateSize.map(dim => tf.zeros([batchSize, dim]));
} else {
this.states_[0] = tf.zeros([batchSize, this.cell.stateSize]);
}
} else {
if (training === true) {
this.keptStates.push(this.states_.slice());
} else {
tf.dispose(this.states_);
}
for (let index = 0; index < this.states_.length; ++index) {
const value = states[index];
const dim = Array.isArray(this.cell.stateSize) ?
this.cell.stateSize[index] :
this.cell.stateSize;
const expectedShape = [batchSize, dim];
if (value.shape[0] != batchSize || value.shape[1] != dim) {
throw new Error(
`State ${index} is incompatible with layer ${this.name}: ` +
`expected shape=${expectedShape}, received shape=${
value.shape}`);
}
this.states_[index] = value;
}
}
this.states_ = this.states_.map(state => tf.keep(state.clone()));
})
}
static get className() {
return 'MyGruLayer';
}
}
tf.serialization.registerClass(MyGruLayer)
// Example: outputs = new MyGruLayer({units: 32}).apply(outputs)
我有完全相同的问题,仍在寻找解决方案