我有一个包含此类功能的TFrecord文件
features = {
'features': tf.FixedLenFeature([17920], tf.float32),
'video_id': tf.FixedLenFeature([], tf.string),
'split_id': tf.FixedLenFeature([], tf.int64),
'audio': tf.FixedLenFeature([44100], tf.float32)
}
我目前获得迭代器的方式是这样的:
def getiterator(batch_size, decode_parallel_calls=1,repeat=False,
shuffle=False,
shuffle_buffer_size=None,
prefetch_size=None,
prefetch_gpu_num=None):
dataset = tf.data.TFRecordDataset(['../main.tfrecords'])
# Shuffle all filepaths every epoch
if shuffle:
dataset = dataset.shuffle(buffer_size=sum(1 for _ in tf.python_io.tf_record_iterator('../main.tfrecords')))
# Repeat
if repeat:
dataset = dataset.repeat()
# Shuffle examples
if shuffle:
dataset = dataset.shuffle(buffer_size=shuffle_buffer_size)
def extract_fn(data_record):
features = {
'features': tf.FixedLenFeature([17920], tf.float32),
'video_id': tf.FixedLenFeature([], tf.string),
'split_id': tf.FixedLenFeature([], tf.int64),
'audio': tf.FixedLenFeature([44100], tf.float32)
}
return tf.parse_single_example(data_record, features)
dataset = dataset.map(extract_fn, num_parallel_calls=decode_parallel_calls)
# Make batches
dataset = dataset.batch(batch_size, drop_remainder=True)
# Get tensors
iterator = dataset.make_one_shot_iterator()
return iterator.get_next()
我感兴趣的一件事是通过下采样到16000Hz来预处理音频数据。但是,我不确定该怎么做,因为我目前只获得张量。什么是好的方法?
您可以简单地将下采样逻辑放在extract_fn
中。
def extract_fn(data_record):
features = {
'features': tf.FixedLenFeature([17920], tf.float32),
'video_id': tf.FixedLenFeature([], tf.string),
'split_id': tf.FixedLenFeature([], tf.int64),
'audio': tf.FixedLenFeature([44100], tf.float32)
}
features_in = tf.parse_single_example(data_record, features)
# This is your audio tensor before downsampling.
audio_tensor = features_in['audio']
# Doing something to downsample audio_tensor
downsampled_audio_tensor = do_something(audio_tensor)
# Now I assume you need every other tensors as is, but replace the audio with
# the one we downsampled above.
features_out = {
'features': features_in['features'],
'video_id': features_in['video_id'],
'split_id': features_in['split_id'],
'audio': downsampled_audio_tensor,
}
return features_out