我已经关注了 youtube 上的一个教程,该教程向我展示了如何对 2 个数据集进行分类(咳嗽,不是咳嗽),但现在我需要添加一个额外的类,即打喷嚏,因此需要训练 3 个类(咳嗽,打喷嚏,其他),我不知道该怎么做。请帮忙!!!
在代码中,模型在 2 个类别(咳嗽、not_cough)上进行训练并且表现相当不错,但我无法让它在多个类别(例如(咳嗽、打喷嚏、其他))上工作。
import os
from matplotlib import pyplot as plt
import tensorflow as tf
import tensorflow_io as tfio
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPool2D, Dropout,TimeDistributed, Reshape
from tensorflow.keras.optimizers.legacy import Adam
from keras import layers
from keras.utils import to_categorical
def load_wav_16k_mono(filename):
# Load encoded wav file
file_contents = tf.io.read_file(filename)
# Decode wav (tensors by channels)
wav, sample_rate = tf.audio.decode_wav(file_contents, desired_channels=1)
# Removes trailing axis
wav = tf.squeeze(wav, axis=-1)
sample_rate = tf.cast(sample_rate, dtype=tf.int64)
# Goes from 44100Hz to 16000hz - amplitude of the audio signal
wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
return wav
def preprocess(file_path, label):
wav = load_wav_16k_mono(file_path)
wav = wav[:8000]
zero_padding = tf.zeros([8000] - tf.shape(wav), dtype=tf.float32)
wav = tf.concat([zero_padding, wav],0)
spectrogram = tf.signal.stft(wav, frame_length=100, frame_step=20)
spectrogram = tf.abs(spectrogram)
spectrogram = tf.expand_dims(spectrogram, axis=2)
return spectrogram, label
def get_CNN(input_shape):
model = Sequential()
model.add(Conv2D(16, (3,3), activation='relu', input_shape=input_shape))
model.add(Conv2D(16, (3,3), activation='relu'))
model.add(MaxPool2D((2,2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(1, activation='softmax'))
model.compile('Adam', loss='BinaryCrossentropy', metrics=[tf.keras.metrics.Recall(),tf.keras.metrics.Precision(),'accuracy'])
model.summary() # drop in some max pool layers to reduce params
return model
def main():
POS_COUGH = "./data/cough"
NEG_COUGH = "./data/not_cough"
#POS_SPEECH = "./data/speech"
pos_cough = tf.data.Dataset.list_files(POS_COUGH+'\*.wav')
neg_cough = tf.data.Dataset.list_files(NEG_COUGH+'\*.wav')
#pos_speech = tf.data.Dataset.list_files(POS_SPEECH +'\*.wav')
cough_labels = tf.data.Dataset.from_tensor_slices(tf.ones(len(pos_cough)))
not_cough_labels = tf.data.Dataset.from_tensor_slices(tf.ones(len(neg_cough)))
# Add labels and Combine Positive and Negative Samples
cough = tf.data.Dataset.zip((pos_cough, cough_labels))
not_cough = tf.data.Dataset.zip((neg_cough, not_cough_labels))
negatives = not_cough
positives = cough
# join both sameples
data = positives.concatenate(negatives)
### 2. Create a Tensorflow Data Pipeline
data = data.map(preprocess)
data = data.cache()
data = data.shuffle(buffer_size=1000)
data = data.batch(16)
data = data.prefetch(8)
## 3. Split data into train and test data
train = data.take(int(len(data) * 0.7))
test = data.skip(int(len(data) * 0.7)).take(int(len(data) - len(data) * 0.7)) #test.as_numpy_iterator().next()
input_shape_spectrogram = (396, 65,1)
model = get_CNN(input_shape_spectrogram)
hist = model.fit(train, epochs=2, validation_data=test)
首先,您的数据集中需要有 3 个类,这意味着您需要区分喷嚏样本,就像您对咳嗽/非咳嗽所做的那样。然后,您需要将输出转换为一个热编码向量,其中除了与类索引对应的元素外,所有元素均为零。例如,如果你考虑不是咳嗽= 0,咳嗽= 1,打喷嚏= 2,则有喷嚏的样本必须是[0, 0, 1],有咳嗽的样本必须是[0, 1, 0],有咳嗽的样本必须是[0, 1, 0]没有咳嗽必须是 [1, 0, 0] 最后,你的外层应该有 3 个神经元。
model.add(Dense(3, activation='softmax'))