Twilio 和 OpenAI 集成，用于与用户进行 AI 语音通信

Question

我正在尝试使用 twilio 和 OpenAI 使用 Nodejs 代码在用户和 AI 助手之间进行语音通信。我已经创建了 url，并且需要实时通信，就像 openAI 是否响应一样，twilio 方法也会说出来，这样我们就不必等待 openAI 的响应并将语音传输给用户。我已经尝试过，它在没有流的情况下工作正常，但我需要实时运行它，就像我们通过电话互相交谈一样。

Here what I tried ---

const apiResponse = require("../helpers/apiResponse");
const { default: mongoose } = require("mongoose");
const twilio = require("twilio");
const http = require("http");

const accountSid = process.env.TWILIO_ACCOUNT_SID;
const authToken = process.env.TWILIO_AUTH_TOKEN;

const client = require("twilio")(accountSid, authToken);
const { Configuration, OpenAIApi } = require("openai");
const { parse } = require("path");
const configuration = new Configuration({
  apiKey: process.env.OPENAI_API_KEY,
});
const openai = new OpenAIApi(configuration);


exports.outgoingCall = async (req, res) => {
  client.calls
    .create({
      url: "http://192.241.128.213/voice.xml",
      //   twiml: '<Response><Say voice="woman">Hi, Thanks for calling. Please let me know how can I help you?</Say><Redirect method="POST">https://de13-223-178-212-10.ngrok-free.app/api/v1/voip/gather-info</Redirect></Response>',
      to: "+17653990096",
      from: "+14708024685",
    })
    .then((call) => {
      console.log(call.sid);
      return apiResponse.successResponseWithData(res, "Sucessful", {});
    });
};

/**
 * gather information about the voice call
 * @param {*} req
 * @param {*} res
 */
exports.gatherInfo = async function (req, res) {
  console.log("gather-info");
  const twiml = new twilio.twiml.VoiceResponse();
  console.log("req.body", req.body);
  console.log("SpeechResult", req.body.SpeechResult);
  //   if (req.body.SpeechResult) {
  //     console.log("in req.body.SpeechResult");

  //     // ai stream
  //     twiml.say("Thank you for your feedback");
  //     client.calls(req.body.CallSid).update({ status: "completed" });
  //     twiml.redirect("/api/v1/voip/gather-info");
  //   } else {
  const gather = twiml.gather({
    action: "/api/v1/voip/gather-info",
    method: "POST",
    partialResultCallback: "/api/v1/voip/gather-info-partial",
    input: "speech",
  });
  if (
    req.body.SpeechResult &&
    req.body.SpeechResult != undefined &&
    req.body.SpeechResult != ""
  ) {
    const response = await openai.createChatCompletion(
      {
        model: "gpt-4",
        messages: [
          {
            role: "system",
            content:
              "You are a helpful interior designer assistant. Please reply in maximum 1 line.",
          },
          { role: "user", content: req.body.SpeechResult },
        ],
        stream: true,
      },
      { responseType: "stream" }
    );
    response.data
      .on("data", (data) => {
        const lines = data
          .toString()
          .split("\n")
          .filter((line) => line.trim() !== "");
        console.log(lines);
        for (const line of lines) {
          const message = line.replace(/^data: /, "");
          if (message === "[DONE]") {
            break; // Stream finished
          }
          try {
            // console.log('message---',message)
            const parsed = JSON.parse(message);
            if (parsed.choices[0].delta.content != undefined) {
              console.log(parsed.choices[0].delta.content);
              twiml.say(parsed.choices[0].delta.content);
            } else {
              continue;
            }
          } catch (error) {
            console.error(
              "Could not JSON parse stream message",
              message,
              error
            );
            continue;
          }
        }
      })
      .on("end", () => {
        twiml.say("Anything else you are looking for?");
        twiml.redirect("/api/v1/voip/gather-info");
        console.log("stream ended");
      });
      console.log('before promise..');
      await new Promise( r=> setTimeout(r,3000));
    // setTimeout(() => {
      console.log("ddddddddd");
    //   twiml.say("Anything else you are looking for?");
    //   twiml.redirect("/api/v1/voip/gather-info");
    // }, 1000);
    // console.log("openn ai response---", response.data);
    // twiml.say(response?.data?.choices[0]?.message?.content);
    // twiml.redirect("/api/v1/voip/gather-info");
  }
  //   }

  res.type("text/xml");
  res.send(twiml.toString());
};


/**
 * gather information partial
 * @param {*} req
 * @param {*} res
 */
exports.gatherInfoPartial = async function (req, res) {
  console.log("gather-info-partial");
  const twiml = new twilio.twiml.VoiceResponse();
  // console.log("req.body",req.body)
  console.log("SpeechResult", req.body.StableSpeechResult);
  twiml.say("can plesae let me know what the service you are looking for?");
  res.type("text/xml");
  res.send(twiml.toString());
};

路线文件--

const express = require("express");
const router = express.Router();
const CallController = require("../controller/CallController");

router.post("/gather-info", CallController.gatherInfo);
router.post("/gather-info-partial", CallController.gatherInfoPartial);

router.get("/outgoing-call", CallController.outgoingCall);

Answer 1

我在这个项目上搜索了两个月，我收集了很多数据，我可以首先给你一个更好的答案增强

gatherInfo

功能。

exports.gatherInfo = async function (req, res) {
  const twiml = new twilio.twiml.VoiceResponse();

  if (req.body.SpeechResult) {
    try {
      const response = await openai.createChatCompletion(
        {
          model: "gpt-4",
          messages: [
            {
              role: "system",
              content:
                "You are a helpful assistant. Please reply concisely.",
            },
            { role: "user", content: req.body.SpeechResult },
          ],
          stream: true,
        }
      );

      response.data
        .on('data', (data) => {
          const message = data.toString();
          if (message.includes('[DONE]')) {
            return; // End the stream
          }
          try {
            const parsed = JSON.parse(message);
            if (parsed.choices && parsed.choices[0].message && parsed.choices[0].message.content) {
              twiml.say(parsed.choices[0].message.content);
            }
          } catch (parseError) {
            console.error("Error parsing OpenAI response:", parseError);
          }
        })
        .on('end', () => {
          twiml.say("Is there anything else I can help with?");
          twiml.redirect("/api/v1/voip/gather-info");
          console.log("OpenAI stream ended.");
        })
        .on('error', (streamError) => {
          console.error("Stream encountered an error:", streamError);
          twiml.say("I'm sorry, I encountered an error.");
          twiml.redirect("/api/v1/voip/gather-info");
        });

    } catch (openAIError) {
      console.error("OpenAI API error:", openAIError);
      twiml.say("I'm sorry, I'm having trouble understanding right now.");
      twiml.redirect("/api/v1/voip/gather-info");
    }
  } else {
    console.log("No speech result received.");
    twiml.say("I didn't catch that. Could you please repeat?");
    twiml.redirect("/api/v1/voip/gather-info");
  }

  res.type("text/xml");
  res.send(twiml.toString());
};

主要改进包括：

Try-Catch 块： 添加了强大的错误处理功能，外部块用于 OpenAI API 错误，数据事件侦听器中的内部块用于解析错误。

改进的日志记录：对每个错误实施详细的日志记录，有助于快速识别和解决问题。

用户反馈：如果出现错误，将通过 Twilio 的 say 方法通知用户，保持参与度和清晰度。

处理无输入：系统现在可以优雅地处理未收到语音结果的情况，再次提示用户输入。

您对我使用 Twilio 等 VoIP 服务进行呼叫处理、允许更多在线处理并避免直接 Android 接口的高级设置有何指导？

VoIP 成本： 担心每分钟费用和潜在的通话质量问题，尤其是长距离连接可能会导致延迟。

TTS 和 STT 服务： 现代文本转语音和语音转文本服务的质量令人印象深刻，但每个单词的成本和潜在的数据传输延迟令人担忧。

带宽：我有一个速度在 270 到 650 Mbps 之间的 5G 互联网设置，对于此用例来说，这似乎具有成本效益，并且可能比光纤互联网更可靠。

我正在寻求以下方面的见解：

预计费用： VoIP、TTS 和 STT 服务。
管理延迟和语音质量：建议或个人经验，尤其是在此类项目中使用 OpenAI 时。

我的期待：

无缝集成聊天机器人来接听和接听电话。
语音传输延迟最小。适用于 VoIP、TTS 和 STT 服务的经济高效的解决方案。

Twilio 和 OpenAI 集成，用于与用户进行 AI 语音通信

问题描述投票：0回答：1

1个回答

最新问题

Twilio 和 OpenAI 集成，用于与用户进行 AI 语音通信

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1