CoCalc -- openai

GitHub Repository: elebumm/RedditVideoMakerBot
Path: blob/master/TTS/openai_tts.py
⁴⁹³ views
1
import random
2

3
import requests
4

5
from utils import settings
6

7

8
class OpenAITTS:
9
    """
10
    A Text-to-Speech engine that uses an OpenAI-like TTS API endpoint to generate audio from text.
11

12
    Attributes:
13
        max_chars (int): Maximum number of characters allowed per API call.
14
        api_key (str): API key loaded from settings.
15
        api_url (str): The complete API endpoint URL, built from a base URL provided in the config.
16
        available_voices (list): Static list of supported voices (according to current docs).
17
    """
18

19
    def __init__(self):
20
        # Set maximum input size based on API limits (4096 characters per request)
21
        self.max_chars = 4096
22
        self.api_key = settings.config["settings"]["tts"].get("openai_api_key")
23
        if not self.api_key:
24
            raise ValueError(
25
                "No OpenAI API key provided in settings! Please set 'openai_api_key' in your config."
26
            )
27

28
        # Read the base URL from the configuration (e.g., "https://api.openai.com/v1" or "https://api.openai.com/v1/")
29
        base_url = settings.config["settings"]["tts"].get(
30
            "openai_api_url", "https://api.openai.com/v1"
31
        )
32
        # Remove trailing slash if present
33
        if base_url.endswith("/"):
34
            base_url = base_url[:-1]
35
        # Append the TTS-specific path
36
        self.api_url = base_url + "/audio/speech"
37

38
        # Set the available voices to a static list as per OpenAI TTS documentation.
39
        self.available_voices = self.get_available_voices()
40

41
    def get_available_voices(self):
42
        """
43
        Return a static list of supported voices for the OpenAI TTS API.
44

45
        According to the documentation, supported voices include:
46
            "alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"
47
        """
48
        return ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
49

50
    def randomvoice(self):
51
        """
52
        Select and return a random voice from the available voices.
53
        """
54
        return random.choice(self.available_voices)
55

56
    def run(self, text, filepath, random_voice: bool = False):
57
        """
58
        Convert the provided text to speech and save the resulting audio to the specified filepath.
59

60
        Args:
61
            text (str): The input text to convert.
62
            filepath (str): The file path where the generated audio will be saved.
63
            random_voice (bool): If True, select a random voice from the available voices.
64
        """
65
        # Choose voice based on configuration or randomly if requested.
66
        if random_voice:
67
            voice = self.randomvoice()
68
        else:
69
            voice = settings.config["settings"]["tts"].get("openai_voice_name", "alloy")
70
            voice = str(voice).lower()  # Ensure lower-case as expected by the API
71

72
        # Select the model from configuration; default to 'tts-1'
73
        model = settings.config["settings"]["tts"].get("openai_model", "tts-1")
74

75
        # Create payload for API request
76
        payload = {
77
            "model": model,
78
            "voice": voice,
79
            "input": text,
80
            "response_format": "mp3",  # allowed formats: "mp3", "aac", "opus", "flac", "pcm" or "wav"
81
        }
82
        headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
83
        try:
84
            response = requests.post(self.api_url, headers=headers, json=payload)
85
            if response.status_code != 200:
86
                raise RuntimeError(f"Error from TTS API: {response.status_code} {response.text}")
87
            # Write response as binary into file.
88
            with open(filepath, "wb") as f:
89
                f.write(response.content)
90
        except Exception as e:
91
            raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}")
92

93
Product

Resources

Company