我正在尝试使用 twilio 和 OpenAI 使用 Nodejs 代码在用户和 AI 助手之间进行语音通信。我已经创建了 url,并且需要实时通信,就像 openAI 是否响应一样,twilio 方法也会说出来,这样我们就不必等待 openAI 的响应并将语音传输给用户。我已经尝试过,它在没有流的情况下工作正常,但我需要实时运行它,就像我们通过电话互相交谈一样。
Here what I tried ---
const apiResponse = require("../helpers/apiResponse");
const { default: mongoose } = require("mongoose");
const twilio = require("twilio");
const http = require("http");
const accountSid = process.env.TWILIO_ACCOUNT_SID;
const authToken = process.env.TWILIO_AUTH_TOKEN;
const client = require("twilio")(accountSid, authToken);
const { Configuration, OpenAIApi } = require("openai");
const { parse } = require("path");
const configuration = new Configuration({
apiKey: process.env.OPENAI_API_KEY,
});
const openai = new OpenAIApi(configuration);
exports.outgoingCall = async (req, res) => {
client.calls
.create({
url: "http://192.241.128.213/voice.xml",
// twiml: '<Response><Say voice="woman">Hi, Thanks for calling. Please let me know how can I help you?</Say><Redirect method="POST">https://de13-223-178-212-10.ngrok-free.app/api/v1/voip/gather-info</Redirect></Response>',
to: "+17653990096",
from: "+14708024685",
})
.then((call) => {
console.log(call.sid);
return apiResponse.successResponseWithData(res, "Sucessful", {});
});
};
/**
* gather information about the voice call
* @param {*} req
* @param {*} res
*/
exports.gatherInfo = async function (req, res) {
console.log("gather-info");
const twiml = new twilio.twiml.VoiceResponse();
console.log("req.body", req.body);
console.log("SpeechResult", req.body.SpeechResult);
// if (req.body.SpeechResult) {
// console.log("in req.body.SpeechResult");
// // ai stream
// twiml.say("Thank you for your feedback");
// client.calls(req.body.CallSid).update({ status: "completed" });
// twiml.redirect("/api/v1/voip/gather-info");
// } else {
const gather = twiml.gather({
action: "/api/v1/voip/gather-info",
method: "POST",
partialResultCallback: "/api/v1/voip/gather-info-partial",
input: "speech",
});
if (
req.body.SpeechResult &&
req.body.SpeechResult != undefined &&
req.body.SpeechResult != ""
) {
const response = await openai.createChatCompletion(
{
model: "gpt-4",
messages: [
{
role: "system",
content:
"You are a helpful interior designer assistant. Please reply in maximum 1 line.",
},
{ role: "user", content: req.body.SpeechResult },
],
stream: true,
},
{ responseType: "stream" }
);
response.data
.on("data", (data) => {
const lines = data
.toString()
.split("\n")
.filter((line) => line.trim() !== "");
console.log(lines);
for (const line of lines) {
const message = line.replace(/^data: /, "");
if (message === "[DONE]") {
break; // Stream finished
}
try {
// console.log('message---',message)
const parsed = JSON.parse(message);
if (parsed.choices[0].delta.content != undefined) {
console.log(parsed.choices[0].delta.content);
twiml.say(parsed.choices[0].delta.content);
} else {
continue;
}
} catch (error) {
console.error(
"Could not JSON parse stream message",
message,
error
);
continue;
}
}
})
.on("end", () => {
twiml.say("Anything else you are looking for?");
twiml.redirect("/api/v1/voip/gather-info");
console.log("stream ended");
});
console.log('before promise..');
await new Promise( r=> setTimeout(r,3000));
// setTimeout(() => {
console.log("ddddddddd");
// twiml.say("Anything else you are looking for?");
// twiml.redirect("/api/v1/voip/gather-info");
// }, 1000);
// console.log("openn ai response---", response.data);
// twiml.say(response?.data?.choices[0]?.message?.content);
// twiml.redirect("/api/v1/voip/gather-info");
}
// }
res.type("text/xml");
res.send(twiml.toString());
};
/**
* gather information partial
* @param {*} req
* @param {*} res
*/
exports.gatherInfoPartial = async function (req, res) {
console.log("gather-info-partial");
const twiml = new twilio.twiml.VoiceResponse();
// console.log("req.body",req.body)
console.log("SpeechResult", req.body.StableSpeechResult);
twiml.say("can plesae let me know what the service you are looking for?");
res.type("text/xml");
res.send(twiml.toString());
};
路线文件--
const express = require("express");
const router = express.Router();
const CallController = require("../controller/CallController");
router.post("/gather-info", CallController.gatherInfo);
router.post("/gather-info-partial", CallController.gatherInfoPartial);
router.get("/outgoing-call", CallController.outgoingCall);
我在这个项目上搜索了两个月,我收集了很多数据,我可以首先给你一个更好的答案增强
gatherInfo
功能。
exports.gatherInfo = async function (req, res) {
const twiml = new twilio.twiml.VoiceResponse();
if (req.body.SpeechResult) {
try {
const response = await openai.createChatCompletion(
{
model: "gpt-4",
messages: [
{
role: "system",
content:
"You are a helpful assistant. Please reply concisely.",
},
{ role: "user", content: req.body.SpeechResult },
],
stream: true,
}
);
response.data
.on('data', (data) => {
const message = data.toString();
if (message.includes('[DONE]')) {
return; // End the stream
}
try {
const parsed = JSON.parse(message);
if (parsed.choices && parsed.choices[0].message && parsed.choices[0].message.content) {
twiml.say(parsed.choices[0].message.content);
}
} catch (parseError) {
console.error("Error parsing OpenAI response:", parseError);
}
})
.on('end', () => {
twiml.say("Is there anything else I can help with?");
twiml.redirect("/api/v1/voip/gather-info");
console.log("OpenAI stream ended.");
})
.on('error', (streamError) => {
console.error("Stream encountered an error:", streamError);
twiml.say("I'm sorry, I encountered an error.");
twiml.redirect("/api/v1/voip/gather-info");
});
} catch (openAIError) {
console.error("OpenAI API error:", openAIError);
twiml.say("I'm sorry, I'm having trouble understanding right now.");
twiml.redirect("/api/v1/voip/gather-info");
}
} else {
console.log("No speech result received.");
twiml.say("I didn't catch that. Could you please repeat?");
twiml.redirect("/api/v1/voip/gather-info");
}
res.type("text/xml");
res.send(twiml.toString());
};
主要改进包括:
Try-Catch 块: 添加了强大的错误处理功能,外部块用于 OpenAI API 错误,数据事件侦听器中的内部块用于解析错误。
改进的日志记录:对每个错误实施详细的日志记录,有助于快速识别和解决问题。
用户反馈:如果出现错误,将通过 Twilio 的 say 方法通知用户,保持参与度和清晰度。
处理无输入:系统现在可以优雅地处理未收到语音结果的情况,再次提示用户输入。
您对我使用 Twilio 等 VoIP 服务进行呼叫处理、允许更多在线处理并避免直接 Android 接口的高级设置有何指导?
VoIP 成本: 担心每分钟费用和潜在的通话质量问题,尤其是长距离连接可能会导致延迟。
TTS 和 STT 服务: 现代文本转语音和语音转文本服务的质量令人印象深刻,但每个单词的成本和潜在的数据传输延迟令人担忧。
带宽:我有一个速度在 270 到 650 Mbps 之间的 5G 互联网设置,对于此用例来说,这似乎具有成本效益,并且可能比光纤互联网更可靠。
我正在寻求以下方面的见解:
我的期待: