语音合成 API 在说出单词时突出显示

Question

目前，我正在制作一个简单的应用程序，其中使用语音合成 API 说出文本。我想突出显示正在说出的单词（粗体）。我目前有一个非常基本的实现，使用“onboundary”事件来执行此操作。但是，我想知道是否有更好/更好的方法来做到这一点，因为我的实现是基于一些假设。

var words;
var wordIdx;
var text;
var utterance = new SpeechSynthesisUtterance();
utterance.lang = 'en-UK';
utterance.rate = 1;

window.onload = function(){
    document.getElementById('textarea').innerText = 'This is a text area.  It is used as a simple test to check whether these words are highlighted as they are spoken using the web speech synthesis API (utterance).';

    document.getElementById('playbtn').onclick = function(){
        text    = document.getElementById('textarea').innerText;
        words   = text.split(' ');
        wordIdx = 0;

        utterance.text = text;
        speechSynthesis.speak(utterance);
    }

    utterance.onboundary = function(event){
        var e = document.getElementById('textarea');
        var it = '';

        for(var i = 0; i < words.length; i++){
            if(i === wordIdx){
                it += '<strong>' + words[i] + '</strong>';
            } else {
                it += words[i];
            }

            it += ' ';
        }

        e.innerHTML = it;
        wordIdx++;
    }
}

Answer 1

您的代码不起作用，但我刚刚编写了一个可以按照您想要的方式工作的示例。打开小提琴看看它是否工作

var utterance = new SpeechSynthesisUtterance();
var wordIndex = 0;
var global_words = [];
utterance.lang = 'en-UK';
utterance.rate = 1;


document.getElementById('playbtn').onclick = function(){
    var text    = document.getElementById('textarea').value;
    var words   = text.split(" ");
    global_words = words;
    // Draw the text in a div
    drawTextInPanel(words);
    spokenTextArray = words;
    utterance.text = text;
    speechSynthesis.speak(utterance);
};

utterance.onboundary = function(event){
    var e = document.getElementById('textarea');
    var word = getWordAt(e.value,event.charIndex);
    // Show Speaking word : x
    document.getElementById("word").innerHTML = word;
    //Increase index of span to highlight
    console.info(global_words[wordIndex]);

    try{
        document.getElementById("word_span_"+wordIndex).style.color = "blue";
    }catch(e){}

    wordIndex++;
};

utterance.onend = function(){
        document.getElementById("word").innerHTML = "";
    wordIndex = 0;
    document.getElementById("panel").innerHTML = "";
};

// Get the word of a string given the string and the index
function getWordAt(str, pos) {
    // Perform type conversions.
    str = String(str);
    pos = Number(pos) >>> 0;

    // Search for the word's beginning and end.
    var left = str.slice(0, pos + 1).search(/\S+$/),
        right = str.slice(pos).search(/\s/);

    // The last word in the string is a special case.
    if (right < 0) {
        return str.slice(left);
    }
    // Return the word, using the located bounds to extract it from the string.
    return str.slice(left, right + pos);
}

function drawTextInPanel(words_array){
console.log("Dibujado");
        var panel = document.getElementById("panel");
    for(var i = 0;i < words_array.length;i++){
        var html = '<span id="word_span_'+i+'">'+words_array[i]+'</span>&nbsp;';
        panel.innerHTML += html;
    }
}

请弹奏以下小提琴：

突出显示口语单词 SpeechSynthesis Javascript 小提琴

它用蓝色突出显示div中的口语单词，您可以自定义粗体样式，但重要的是想法。

注意：请记住，

onboundary

事件仅针对本机（本地）语音合成而触发。更改 Google 示例中指定的语音（即 Google UK English Male）为 google 远程语音，将使您的代码失败，因为 SpeechSynthesis API 似乎只播放由 google 服务器生成的声音。

Answer 2

如果您使用 React，则可以使用

tts-react

。这是一个从 CDN 加载它的示例，但它也可以用作 NPM 包。

<!DOCTYPE html>
<html lang="en-US">
  <head>
    <title>tts-react UMD example</title>
    <script src="https://unpkg.com/react@18/umd/react.development.js"></script>
    <script src="https://unpkg.com/react-dom@18/umd/react-dom.development.js"></script>
    <script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
    <script src="https://unpkg.com/[email protected]/dist/umd/tts-react.min.js"></script>
  </head>
  <body>
    <div id="root"></div>
    <script type="text/babel">
      const root = ReactDOM.createRoot(document.getElementById('root'))
      const { TextToSpeech, useTts } = TTSReact
      const CustomTTS = ({ children }) => {
        const { play, ttsChildren } = useTts({ children, markTextAsSpoken: true })

        return (
          <>
            <button onClick={() => play()}>Play</button>
            <div>{ttsChildren}</div>
          </>
        )
      }

      root.render(
        <>
          <CustomTTS>
            <p>Highlight words as they are spoken.</p>
          </CustomTTS>
          <TextToSpeech size="small" markTextAsSpoken>
            <p>Highlight words as they are spoken.</p>
          </TextToSpeech>
        </>
      )
    </script>
  </body>
</html>

Answer 3

最近，我想在网络上实现文本转语音。然后我做研究：

这就是我得到的：

使用网络语音合成

当我们使用内置的浏览器网络语音合成 API 时，它是免费的，但是，它会带来各种问题。像机器人一样的声音、拼写错误等等。查看所有问题

使用音频文件

我们可以通过使用音频文件来实现良好的人声。但是，当我们想要使用音频文件执行 TTS 时，我们需要这样的转录时间戳：

[
  {
    text: "hello world",
    start: 0
    end: 1.2
  }
]

成绩单时间戳生成理论上需要机器学习。

解决方案

然后我决定制作React / Vanilla Speechhighlight npm 包可以将 TTS 与 Web 语音合成 API 和使用音频文件的 TTS 的所有优点结合起来。

您可以使用各种语音合成 API 提供商（如 ElevenLabs、Google Cloud TTS、Amazon Polly 和 Open AI）制作音频文件。

使用音频文件执行 TTS 的转录时间戳怎么样？

我制作了转录时间戳检测引擎。所以我的包可以读取音频并生成转录时间戳。

在这里演示网站您可以尝试该功能。

查看我对此的完整研究

示例代码

这是简单的示例代码。想要更多？请参阅演示网站私人仓库

设置荧光笔样式

文件

App.css

.highlight-spoken {
  color: black !important;
  background-color: #ff6f00 !important;
  border-radius: 5px;
}

.highlight-sentence {
  color: #000000 !important;
  background-color: #ffe082 !important;
  border-radius: 5px;
}

代码示例

文件

App.js

import "./App.css";
import { useEffect, useMemo, useRef, useState } from "react";
import { markTheWords, useTextToSpeech } from "react-speech-highlight";

export default function App() {
  const text = "Some Input String";
  const textEl = useRef();
  const lang = "en-US";

  const { controlHL, statusHL, prepareHL, spokenHL } = useTextToSpeech({
    disableSentenceHL: false,
    disableWordHL: false,
    autoScroll: false,
    lang: lang,
  });

  const textHL = useMemo(() => markTheWords(text), [text]);

  return (
    <>
      <div ref={textEl}>
        <div
          dangerouslySetInnerHTML={{
            __html: textHL,
          }}
        ></div>
      </div>

      <PanelControlTTS
        isPlay={statusHL == "play" || statusHL == "calibration"}
        play={() => {
          if (statusHL == "pause") {
            controlHL.resume();
          } else {
            controlHL.play(
              textEl.current
            );
          }
        }}
        pause={controlHL.pause}
        stop={controlHL.stop}
      />
    </>
  );
}

TTS 控制示例

文件

PanelControlTTS.js

export default function PanelControlTTS({ isPlay, play, pause, stop }) {
  return (
    <>
      <button
        onClick={() => {
          if (isPlay) {
            pause();
          } else {
            play();
          }
        }}
      >
        {isPlay ? "pause" : "play"}
      </button>

      {isPlay && <button onClick={stop}>stop</button>}
    </>
  );
}

语音合成 API 在说出单词时突出显示

问题描述投票：0回答：3

3个回答

这就是我得到的：

使用网络语音合成

使用音频文件

解决方案

示例代码

设置荧光笔样式

代码示例

TTS 控制示例

最新问题

语音合成 API 在说出单词时突出显示

问题描述 投票：0回答：3

3个回答

这就是我得到的：

使用网络语音合成

使用音频文件

解决方案

示例代码

设置荧光笔样式

代码示例

TTS 控制示例

最新问题

问题描述投票：0回答：3