Click here to Skip to main content
15,880,392 members
Articles / Artificial Intelligence / ChatGPT

Chat GPT plus Eleven Labs in JavaScript

Rate me:
Please Sign up or sign in to vote.
5.00/5 (6 votes)
19 Mar 2023CPOL1 min read 15.6K   339   12   5
Web app to talk to Chat GPT using Eleven Labs text-to-speech
In this article, I have attempted to create a web page to talk to Chat GPT with speech-to-text and Eleven Labs text-to-speech browser capabilities. The goal is to make Chat GPT talk with a human like voice.

Introduction

This application is my attempt to create a client app to talk to Chat GPT in JavaScript. My goal is to demonstrate how to use Chat GPT API with speech-to-text and Eleven Labs text-to-speech capabilities. This means that you can talk to your browser and your browser will talk back to you with a human like voice.Image 1

Background

This article is a sequel to my previous article Chat GPT in JavaScript.

Using the Code

  1. Get OPENAI_API_KEY from https://beta.openai.com/account/api-keys.
  2. Open ChatGPT_11Labs.js and add the API Key key to the first line.
  3. Get ELEVEN_LABS_API_KEY from https://beta.elevenlabs.io/speech-synthesis > Profile > API Key.
  4. Open ChatGPT_11Labs.js and add the API Key key to the second line.

Here is the code. Basically, it uses XMLHttpRequest to post JSON to OpenAI's endpoint and then posts the answer from Chat GPT to Eleven Labs endpoint.

Code for ChatGpt_11Labs.js

JavaScript
var OPENAI_API_KEY = "";
var ELEVEN_LABS_API_KEY = "";
var sVoiceId = "21m00Tcm4TlvDq8ikWAM"; //Rachel
var bSpeechInProgress = false;
var oSpeechRecognizer = null

function OnLoad() {

    if ("webkitSpeechRecognition" in window) {
    } else {
        //speech to text not supported
        lblSpeak.style.display = "none";
    }

    GetVoiceList();
}

function ChangeLang(o) {
    if (oSpeechRecognizer) {
        oSpeechRecognizer.lang = selLang.value;
        //SpeechToText()
    }
}

function GetVoiceList() {   

    var oHttp = new XMLHttpRequest();
    oHttp.open("GET", "https://api.elevenlabs.io/v1/voices");
    oHttp.setRequestHeader("Accept", "application/json");
    oHttp.setRequestHeader("Content-Type", "application/json");
    oHttp.setRequestHeader("xi-api-key", ELEVEN_LABS_API_KEY)

    oHttp.onreadystatechange = function () {
        if (oHttp.readyState === 4) {

            var oJson = { voices: []};

            try {
                oJson = JSON.parse(oHttp.responseText);
            } catch (ex) {
                txtOutput.value += "Error: " + ex.message
            }

            for (var i = 0; i < oJson.voices.length; i++) {
                selVoices.options[selVoices.length] = new Option(oJson.voices[i].name, oJson.voices[i].voice_id);
            };
        }
    };

    oHttp.send();
}

function SayIt() {
    var s = txtMsg.value;
    if (s == "") {
        txtMsg.focus();
        return;
    }

    TextToSpeech(s);
}

function TextToSpeech(s) {
    if (chkMute.checked) return;

    if (selVoices.length > 0 && selVoices.selectedIndex != -1) {
        sVoiceId = selVoices.value;
    }

    spMsg.innerHTML = "Eleven labs text-to-speech...";

    var oHttp = new XMLHttpRequest();
    oHttp.open("POST", "https://api.elevenlabs.io/v1/text-to-speech/" + sVoiceId);
    oHttp.setRequestHeader("Accept", "audio/mpeg");
    oHttp.setRequestHeader("Content-Type", "application/json");
    oHttp.setRequestHeader("xi-api-key", ELEVEN_LABS_API_KEY)

    oHttp.onload = function () {
        if (oHttp.readyState === 4) {

            spMsg.innerHTML = "";

            var oBlob = new Blob([this.response], { "type": "audio/mpeg" });
            var audioURL = window.URL.createObjectURL(oBlob);
            var audio = new Audio();
            audio.src = audioURL;
            audio.play();
        }
    };

    var data = {
        text: s,
        voice_settings: { stability: 0, similarity_boost: 0 }
    };

    oHttp.responseType = "arraybuffer";
    oHttp.send(JSON.stringify(data));
}

function SetModels() {
    selModel.length = 0;

    var oHttp = new XMLHttpRequest();
    oHttp.open("GET", "https://api.openai.com/v1/models");
    oHttp.setRequestHeader("Accept", "application/json");
    oHttp.setRequestHeader("Content-Type", "application/json");
    oHttp.setRequestHeader("Authorization", "Bearer " + OPENAI_API_KEY);

    oHttp.onreadystatechange = function () {
        if (oHttp.readyState === 4) {

            var oJson = { voices: [] };

            try {
                oJson = JSON.parse(oHttp.responseText);
            } catch (ex) {
                txtOutput.value += "Error: " + ex.message
            }

            var l = [];

            for (var i = 0; i < oJson.data.length; i++) {
                l.push(oJson.data[i].id);                
            };

            l.sort();

            for (var i = 0; i < l.length; i++) {
                selModel.options[selModel.length] = new Option(l[i], l[i]);
            };

            for (var i = 0; i < selModel.length; i++) {
                if (selModel.options[i].value == "text-davinci-003") {
                    selModel.selectedIndex = i;
                    break;
                }
            };
        }
    };

    oHttp.send();
}

function Send() {

    var sQuestion = txtMsg.value;
    if (sQuestion == "") {
        alert("Type in your question!");
        txtMsg.focus();
        return;
    }

    spMsg.innerHTML = "Chat GPT is thinking...";

    var sUrl = "https://api.openai.com/v1/completions";
    var sModel = selModel.value;// "text-davinci-003";
    if (sModel.indexOf("gpt-3.5-turbo") != -1) {
        //https://openai.com/research/gpt-4
        sUrl = "https://api.openai.com/v1/chat/completions";
    }

    var oHttp = new XMLHttpRequest();
    oHttp.open("POST", sUrl);
    oHttp.setRequestHeader("Accept", "application/json");
    oHttp.setRequestHeader("Content-Type", "application/json");
    oHttp.setRequestHeader("Authorization", "Bearer " + OPENAI_API_KEY)

    oHttp.onreadystatechange = function () {
        if (oHttp.readyState === 4) {
            //console.log(oHttp.status);

            spMsg.innerHTML = "";

            var oJson = {}
            if (txtOutput.value != "") txtOutput.value += "\n";

            try {
                oJson = JSON.parse(oHttp.responseText);
            } catch (ex) {
                txtOutput.value += "Error: " + ex.message
            }

            if (oJson.error && oJson.error.message) {
                txtOutput.value += "Error: " + oJson.error.message;

            } else if (oJson.choices) {
                var s = "";

                if (oJson.choices[0].text) {
                    s = oJson.choices[0].text;

                } else if (oJson.choices[0].message) {
                    //GPT-4
                    s = oJson.choices[0].message.content;
                }                

                if (selLang.value != "en-US") {
                    var a = s.split("?\n");
                    if (a.length == 2) {
                        s = a[1];
                    }
                }

                if (s == "") {
                    s = "No response";
                } else {
                    txtOutput.value += "Chat GPT: " + s;
                    TextToSpeech(s);
                }
            }            
        }
    };

    var iMaxTokens = 2048;
    var sUserId = "1";
    var dTemperature = 0.5;    

    var data = {
        model: sModel,
        prompt: sQuestion,
        max_tokens: iMaxTokens,
        user: sUserId,
        temperature:  dTemperature,
        frequency_penalty: 0.0, //Number between -2.0 and 2.0  Positive value decrease the model's likelihood to repeat the same line verbatim.
        presence_penalty: 0.0,  //Number between -2.0 and 2.0. Positive values increase the model's likelihood to talk about new topics.
        stop: ["#", ";"] //Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
    }

    //chat GPT-4 gpt-4
    if (sModel.indexOf("gpt-3.5-turbo") != -1) {
        data = {
            "model": sModel,
            "messages": [
                //{
                //    "role": "system",
                //    "content": "You are a helpful assistant."  assistant messages help store prior responses
                //},
                {
                    "role": "user", //system,user,assistant
                    "content": sQuestion
                }
            ]
        }
    }

    oHttp.send(JSON.stringify(data));

    if (txtOutput.value != "") txtOutput.value += "\n";
    txtOutput.value += "Me: " + sQuestion;
    txtMsg.value = "";
}

function Mute(b) {
    if (b) {
        selVoices.style.display = "none";
    } else {
        selVoices.style.display = "";
    }
}

function SpeechToText() {

    if (oSpeechRecognizer) {

        if (chkSpeak.checked) {
            oSpeechRecognizer.start();
        } else {
            oSpeechRecognizer.stop();
        }

        return;
    }    

    oSpeechRecognizer = new webkitSpeechRecognition();
    oSpeechRecognizer.continuous = true;
    oSpeechRecognizer.interimResults = true;
    oSpeechRecognizer.lang = selLang.value;
    oSpeechRecognizer.start();

    oSpeechRecognizer.onresult = function (event) {
        var interimTranscripts = "";
        for (var i = event.resultIndex; i < event.results.length; i++) {
            var transcript = event.results[i][0].transcript;

            if (event.results[i].isFinal) {
                txtMsg.value = transcript;
                Send();
            } else {
                transcript.replace("\n", "<br>");
                interimTranscripts += transcript;
            }

            var oDiv = document.getElementById("idText");
            oDiv.innerHTML = '<span style="color: #999;">' + interimTranscripts + '</span>';
        }
    };

    oSpeechRecognizer.onerror = function (event) {

    };
}

Code for the HTML page ChatGpt_11Labs.html

HTML
<!DOCTYPE html>
<html>
<head>
    <title>Chat GPT Plus Eleven Labs</title>
    <script src="ChatGpt_11Labs.js?v=6"></script>
</head>
<body onload="OnLoad()">

    <div id="idContainer">
        <textarea id="txtOutput" rows="10" style="margin-top: 10px; width: 100%;" placeholder="Output"></textarea>

        <div>
            <button type="button" onclick="Send()" id="btnSend">Send</button>
            <button type="button" onclick="SayIt()" style="display: none">Say It</button>
            <label id="lblSpeak"><input id="chkSpeak" type="checkbox" onclick="SpeechToText()" />Listen</label>
            <label id="lblMute"><input id="chkMute" type="checkbox" onclick="Mute(this.checked)" />Mute</label>

            <select id="selModel">
                <option value="text-davinci-003">text-davinci-003</option>
                <option value="text-davinci-002">text-davinci-002</option>
                <option value="code-davinci-002">code-davinci-002</option>
                <option value="gpt-3.5-turbo">gpt-3.5-turbo</option>
                <option value="gpt-3.5-turbo-0301">gpt-3.5-turbo-0301</option>
            </select>

            <button type="button" onclick="SetModels()" id="btnSetModels" title="Load all models">...</button>

            <select id="selLang" onchange="ChangeLang(this)">
                <option value="en-US">English (United States)</option>
                <option value="fr-FR">French (France)</option>
                <option value="ru-RU">Russian (Russia)</option>
                <option value="pt-BR">Portuguese (Brazil)</option>
                <option value="es-ES">Spanish (Spain)</option>
                <option value="de-DE">German (Germany)</option>
                <option value="it-IT">Italian (Italy)</option>
                <option value="pl-PL">Polish (Poland)</option>
                <option value="nl-NL">Dutch (Netherlands)</option>
            </select>

            <select id="selVoices"></select>

            <span id="spMsg"></span>
        </div>

        <textarea id="txtMsg" rows="5" wrap="soft" style="width: 98%; margin-left: 3px; margin-top: 6px" placeholder="Input Text"></textarea>

        <div id="idText"></div>
    </div>

</body>
</html>

Points of Interest

Not all browsers support speech-to-text. Chrome and Edge seem to support it while Firefox does not. This is why the Listen checkbox will be hidden in Firefox.

History

  • 20th March, 2023: Version 1 created
  • May 24. 2023, Chat GPT4 support

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Web Developer
United States United States
Igor is a business intelligence consultant working in Tampa, Florida. He has a BS in Finance from University of South Carolina and Masters in Information Management System from University of South Florida. He also has following professional certifications: MCSD, MCDBA, MCAD.

Comments and Discussions

 
QuestionFunction to view Chars left on EvenLabs Pin
Marcelo Mozzi29-Mar-23 6:41
Marcelo Mozzi29-Mar-23 6:41 
QuestionOther languages at ElevenLabs Pin
Marcelo Mozzi27-Mar-23 2:05
Marcelo Mozzi27-Mar-23 2:05 
AnswerRe: Other languages at ElevenLabs Pin
Igor Krupitsky27-Mar-23 8:01
mvaIgor Krupitsky27-Mar-23 8:01 
QuestionIs it free Pin
Alen Toma19-Mar-23 20:09
Alen Toma19-Mar-23 20:09 
AnswerRe: Is it free Pin
Igor Krupitsky20-Mar-23 4:01
mvaIgor Krupitsky20-Mar-23 4:01 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Praise Praise    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.