我有一个作业,需要在给定 mp3 输入文件的情况下返回以下内容。这些文件可能长达 12 小时。我已经开始使用 Naudio 可以得到前 3 个,但之后的一切都给我带来了很多麻烦。
我知道数据库的东西有点奇怪,它可以是 dbfs。没有很好的文档来说明我如何处理这些简单的事情。如果有的话,我希望有一个更简单的库。
我有这段代码,我试图用它来制作一系列可分样本,但它总是返回-无穷大。即使是 10 分钟的音频片段也需要很长时间才能运行。
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Reflection.PortableExecutable;
using System.Text;
using System.Threading.Tasks;
using NAudio.Wave;
using NAudio.Wave.SampleProviders;
namespace SoundScouter
{
internal class SoundLibrary
{
/* Function to generate all parts of Sound Scouter
* Output: true if worked, false if did not work
*/
public static bool generateAll(string strFileName)
{
//check to make sure that the file being opened is an audio file
AudioFileReader objInputAudio;
try
{
objInputAudio = new AudioFileReader(strFileName);
}
catch (Exception ex)
{
return false;
}
double dblLength = objInputAudio.Length /
(objInputAudio.WaveFormat.SampleRate *
objInputAudio.WaveFormat.Channels *
objInputAudio.WaveFormat.BitsPerSample / 8);
//get start and end time
DateTime dtStart = getStartTime(strFileName);
DateTime dtEnd = getEndTime(dtStart, dblLength);
byte[] test = getByteSamples(objInputAudio, (int)objInputAudio.Length);
double[] adblSamples = getDecibelSamples(objInputAudio, 1, 40);
generateReport(strFileName, dtStart, dtEnd, dblLength);
return true;
}
/* Function to generate a Sound Report
* Output: the generated report
*/
public static SoundReport generateReport(string strFileName, DateTime dtStart, DateTime dtEnd, double length)
{
SoundReport objReport = new SoundReport();
objReport.strFileName = strFileName;
objReport.dtStart = dtStart;
objReport.dtEnd = dtEnd;
objReport.dblTotalTimeSeconds = length;
objReport.strTotalTime = TimeSpan.FromSeconds(length).ToString(@"hh\:mm\:ss");
Console.WriteLine(objReport.ToString());
return objReport;
}
public static int getMaxDB(AudioFileReader objInputAudio)
{
ISampleProvider objISP = objInputAudio.ToSampleProvider();
objISP.
return 1;
}
void OnDataAvailable(object sender, WaveInEventArgs args)
{
float max = 0;
var buffer = new WaveBuffer(args.Buffer);
// interpret as 32 bit floating point audio
for (int index = 0; index < args.BytesRecorded / 4; index++)
{
var sample = buffer.FloatBuffer[index];
// absolute value
if (sample < 0) sample = -sample;
// is this the max value?
if (sample > max) max = sample;
}
}
/* Function to retreive float values of audio samples
* Output: array of samples
*/
public static float[] getFloatSamples(AudioFileReader objInputAudio)
{
ISampleProvider objISP = objInputAudio.ToSampleProvider();
float[] afltSamples = new float[objInputAudio.Length];
int intSamplesRead;
do
{
intSamplesRead = objISP.Read(afltSamples, 0, afltSamples.Length);
} while (intSamplesRead > 0);
return afltSamples;
}
/* Function to retreive byte values of audio samples
* Output: array of samples
*/
public static byte[] getByteSamples(AudioFileReader objInputAudio, int intLength)
{
byte[] abytSamples = new byte[intLength];
objInputAudio.Read(abytSamples, 0, intLength);
return abytSamples;
}
/* Function to retreive the start time of the input file
* Output: datetime of when the file was created
*/
public static DateTime getStartTime(string strFileName)
{
return File.GetCreationTime(strFileName);
}
/* Function to retreive the end time of the input file
* Output: datetime of when the audio ended
*/
public static DateTime getEndTime(DateTime dtStartTime, double dblLength)
{
return dtStartTime.AddSeconds(dblLength);
}
/* Function to meausre decibels
* Output decible in
*/
public static double measureDecibels(byte[] abytBuffer, int intLength, int intOffset)
{
double sum = 0;
for (int i = intOffset; i < intLength + intOffset; i = i + 2)
{
double sample = BitConverter.ToInt16(abytBuffer, i) / 32768.0;
sum += (sample * sample);
}
double rms = Math.Sqrt(sum / (intLength / 2));
return 20 * Math.Log10(rms);
}
public static double[] getDecibelSamples(AudioFileReader objInputAudio, int intSamplesPerSecond, int intDecibleCalibration)
{
double dblRecordedSampleRate = (objInputAudio.WaveFormat.SampleRate * objInputAudio.WaveFormat.Channels * objInputAudio.WaveFormat.BitsPerSample / 8);
double dblLength = objInputAudio.Length / dblRecordedSampleRate;
double[] adblDecibleSamples = new double[(long) (dblLength * intSamplesPerSecond) -1];
int intSampleSize = (int)dblRecordedSampleRate / intSamplesPerSecond;
int intBlockSize = (int)Math.Min(dblLength, 500);
for (int i = 0; i <= adblDecibleSamples.Length - 1; i++) {
byte[] abytSamples = getByteSamples(objInputAudio, (int)(intBlockSize * dblRecordedSampleRate));
for(int k = 0; k < abytSamples.Length; k+= intSampleSize)
{
adblDecibleSamples[i] = intDecibleCalibration + measureDecibels(abytSamples, intSampleSize, k);
}
}
return adblDecibleSamples;
}
}
}
在我们开始之前,Mark 编写了一个示例 WaveFormRenderer 库,它可以完成您正在做的很多工作。看看吧。
无论如何,这个项目的主旨似乎是扫描文件以找出它的声音有多大,以及哪里声音最大。我经常看到的错误(并且不止一次让自己犯过)是简单地寻找高样本值并将其称为“大声”。不是……或者至少不一定。令人惊讶的是,在没有发生太多其他事情的情况下,随机高样本的出现频率如此之高:点击声、爆裂声、静态或偶尔的编解码器故障。
不要检查单个样本,而是抓取其中的大块样本并进行一些平均值。测量样本块声音大小的最有用的近似值是 RMS:均方根。正确应用这一点将大大减少不必要的噪音对您的过程的影响。虽然计算起来可能有点麻烦,但这是值得的。您只需要根据采样率计算出一个合适的块大小,然后就可以开始了。
这里有一段简单的代码,用于生成 MP3 文件中的块的统计信息:
readonly record struct ChunkStats(TimeSpan Timestamp, float RMS, float RMSdB);
private static IEnumerable<ChunkStats> GetAudioChunks(ISampleProvider provider, TimeSpan timePerChunk = default(TimeSpan))
{
if (timePerChunk == TimeSpan.Zero)
{
// Pick a reasonable time based on sample rate
timePerChunk = provider.WaveFormat.SampleRate switch
{
8000 or 44100 => TimeSpan.FromMilliseconds(50),
_ => TimeSpan.FromMilliseconds(40)
};
}
int numSamples = (int)(provider.WaveFormat.SampleRate * provider.WaveFormat.Channels * timePerChunk.TotalSeconds);
float[] buffer = new float[numSamples];
int rc;
TimeSpan time = TimeSpan.Zero;
while ((rc = provider.Read(buffer, 0, numSamples)) > 0)
{
float sum = 0;
foreach (var v in buffer[0..rc])
sum += v * v;
var rms = Math.Sqrt(sum / rc);
// Simple dB calculation, clamping at -100 to avoid pesky infinities.
var dBRMS = Math.Max(-100, 20 * Math.Log10(rms));
yield return new(time, (float)rms, (float)dBRMS);
time += timePerChunk;
}
}
它很粗糙,但它展示了您可以构建的基本思想。