Code Hacks for the Chunk Stream Order Inconsistency for the Text to Speech integration (though it is still inconsistent):

ElevenLabs API

// Eleven Labs API
const API_KEY = 'YOUR_API_KEY'
const options = {method: 'GET', headers: {'xi-api-key': API_KEY}};

let elevenlabsVoices;

await fetch('https://api.elevenlabs.io/v1/voices', options)
    .then(response => response.json())
    .then(response => (console.log(response), elevenlabsVoices = [...response.voices]))
    .catch(err => console.error(err));

console.log(elevenlabsVoices)

window.chosenVoiceName = elevenlabsVoices[0].voice_id;

document.body.innerHTML = `
<style>
    body {
        color: white;
        font-family: system-ui, sans-serif;
    }
</style>
Please choose a voice:
<br>
<select onchange="window.chosenVoiceName=this.value;">${elevenlabsVoices.map(n => `<option value="${n.voice_id}">${n.name} - ${n.labels ? `${n.labels.accent}, ${n.labels.description}, ${n.labels.age}, ${n.labels.gender}${n.labels['use case'] ? `, ${n.labels['use case']}` : ''}` : '' }</option>`).join("")}</select>
<br>
<button onclick="oc.window.hide();">submit</button>
<br><br>
(As you can see, this plugin is pretty rudimentary for now. Feel free to ask for more features on the Discord.)
`;

oc.window.show()

let sentence = [];
oc.thread.on("StreamingMessage", async function (data) {
  for await (let chunk of data.chunks) {
    sentence.push(chunk)
    // console.log('chunk', chunk)
    // console.log('sentence', sentence)
    sentence = sentence.toSorted((a,b) => a.index - b.index)
    let text = sentence.map(a => a.text).join("")
    // console.log('sentence', text)
    if(text.endsWith('.') || text.endsWith('!') || text.endsWith('?') || sentence.length > 20) {
      console.log("Speaking sentence:", text);
      let item = await textToSpeech({text:JSON.stringify(text).replaceAll('*', '').replaceAll('\\', ''), voiceName:window.chosenVoiceName, lastItem:sentence[sentence.length-1]});
      sentence = sentence.slice(sentence.indexOf(item))
    }
  }
});

async function textToSpeech({text, voiceName, lastItem}) {
  return new Promise((resolve, reject) => {
    let body = {
      "model_id":"eleven_multilingual_v1",
      "text": text,
      "voice_settings":{
        "similarity_boost":0.5,
        "stability":0.5
      }
    }
    const options = {
        method: 'POST',
        headers: {
            'xi-api-key': API_KEY,
            'Content-Type': 'application/json'
        },
        body: JSON.stringify(body)
    };
    let audio;
    fetch(`https://api.elevenlabs.io/v1/text-to-speech/${voiceName}`, options)
    .then(response => response.blob())
    .then(blob => {
      const url = URL.createObjectURL(blob);
      audio = new Audio(url)
      audio.onended = function() {
        resolve(lastItem)
      }
      audio.play()
    }).catch(err => (console.error(err), reject()))

  });
}

Speech Synthesis

// work around Chrome bug:
while(speechSynthesis.getVoices().length === 0) {
  await new Promise(r => setTimeout(r, 10));
}

let availableVoiceNames = speechSynthesis.getVoices().map(v => v.name).sort((a,b) => a.toLowerCase().includes("english") ? -1 : 1);
window.chosenVoiceName = availableVoiceNames[0];

document.body.innerHTML = `
  Please choose a voice:
  <br>
  <select onchange="window.chosenVoiceName=this.value;">${availableVoiceNames.map(n => `<option>${n}</option>`).join("")}</select>
  <br>
  <button onclick="oc.window.hide();">submit</button>
  <br><br>
  (As you can see, this plugin is pretty rudimentary for now. Feel free to ask for more features on the Discord.)
`;

oc.window.show();

let sentence = [];
let indices = []
oc.thread.on("StreamingMessage", async function (data) {
  for await (let chunk of data.chunks) {
    sentence.push(chunk)
    // console.log('chunk', chunk)
    // console.log('sentence', sentence)
    sentence = sentence.toSorted((a,b) => a.index - b.index)
    let text = sentence.map(a => a.text).join("")
    // console.log('sentence', text)
    if(text.endsWith('.') || text.endsWith('!') || text.endsWith('?')) {
      console.log("Speaking sentence:", text);
      let item = await textToSpeech({text:JSON.stringify(text).replaceAll('*', '').replaceAll('\\', ''), voiceName:window.chosenVoiceName, lastItem:sentence[sentence.length-1]});
      sentence = sentence.slice(sentence.indexOf(item))
    }
  }
});

function textToSpeech({text, voiceName, lastItem}) {
  return new Promise((resolve, reject) => {
    const voices = speechSynthesis.getVoices();
    const voice = voices.find(v => v.name === voiceName);
    const utterance = new SpeechSynthesisUtterance();
    utterance.text = text;
    utterance.voice = voice;
    utterance.rate = 1.2;
    utterance.pitch = 1.0;
    utterance.onend = function() {
      resolve(lastItem);
    };
    utterance.onerror = function(e) {
      reject(e);
    };
    speechSynthesis.speak(utterance);
  });
}
Edit
Pub: 09 May 2024 13:32 UTC
Edit: 09 May 2024 13:36 UTC
Views: 129