我正在使用 Azure STT 服务,我的目标是禁用识别结果中的自动标点符号。
我使用来自连续流源的连续识别,代码如下:
// Initialize the Stream Handling PullAudioInputStreamCallback derived class
var audioStream = new VoiceAudioStream();
// Initialize with the format required by the Speech service
var audioFormat = AudioStreamFormat.GetWaveFormatPCM(8000, 16, 1);
// Configure speech SDK to work with the audio stream in right format.
var audioConfig = AudioConfig.FromStreamInput(audioStream, audioFormat);
// Get credentials and region from client's message.
var speechConfig = SpeechConfig.FromSubscription(key, region);
speechConfig.SpeechRecognitionLanguage = "it-IT";
speechConfig.EnableDictation(); //<- Dictation Mode
// With all the info at hand, finally create the SpeechRecognizer instance.
var speechClient = new SpeechRecognizer(speechConfig, audioConfig);
stopRecognition = new TaskCompletionSource<int>();
// Finally start.
await speechClient.StartContinuousRecognitionAsync();
Task.WaitAny(new[] { stopRecognition.Task });
这是我正在使用的从
PullAudioInputStreamCallback
派生的类:
public class VoiceAudioStream : PullAudioInputStreamCallback
{
private readonly EchoStream _dataStream = new();
private ManualResetEvent _waitForEmptyDataStream = null;
public override int Read(byte[] dataBuffer, uint size)
{
if (_waitForEmptyDataStream != null && !_dataStream.DataAvailable)
{
_waitForEmptyDataStream.Set();
return 0;
}
return _dataStream.Read(dataBuffer, 0, dataBuffer.Length);
}
public void Write(byte[] buffer, int offset, int count)
{
_dataStream.Write(buffer, offset, count);
}
public override void Close()
{
if (_dataStream.DataAvailable)
{
_waitForEmptyDataStream = new ManualResetEvent(false);
_waitForEmptyDataStream.WaitOne();
}
_waitForEmptyDataStream.Close();
_dataStream.Dispose();
base.Close();
}
}
这里是EchoStream类的代码:
public class EchoStream : MemoryStream
{
private readonly ManualResetEvent _DataReady = new(false);
private readonly ConcurrentQueue<byte[]> _Buffers = new();
public bool DataAvailable { get { return !_Buffers.IsEmpty; } }
public override void Write(byte[] buffer, int offset, int count)
{
_Buffers.Enqueue(buffer.Take(count).ToArray()); // add new data to buffer
_DataReady.Set(); // allow waiting reader to proceed
}
public override int Read(byte[] buffer, int offset, int count)
{
_DataReady.WaitOne(); // block until there's something new to read
if (!_Buffers.TryDequeue(out byte[] lBuffer)) // try to read
{
_DataReady.Reset();
return -1;
}
if (!DataAvailable)
_DataReady.Reset();
Array.Copy(lBuffer, buffer, lBuffer.Length);
return lBuffer.Length;
}
}
识别工作正常,除了自动标点符号,尽管启用了“听写”,但仍然有效。
那么一些建议如何禁用自动标点符号?
最后我发现了 Microsoft.CognitiveServices.Speech 包版本 1.34.0 中的问题。
更新到新版本1.34.1“听写模式”也可以使用。
谢谢大家。
皮尔卡洛