ttsm / app.py
trysem's picture
Create app.py
7a2a004 verified
import gradio as gr
import torch
import numpy as np
from transformers import pipeline
# 1. Setup device (Use GPU if available on the Space, otherwise CPU)
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Loading aoxo/swaram model on {device}...")
# 2. Load the TTS pipeline globally so it only loads once when the Space starts
try:
synthesizer = pipeline("text-to-speech", model="aoxo/swaram", device=device)
print("Model loaded successfully!")
except Exception as e:
print(f"Error loading model: {e}")
synthesizer = None
# 3. Define the prediction function
def generate_audio(text):
if not text.strip():
return None, "Please enter some text."
if synthesizer is None:
return None, "Error: Model failed to load. Check Space logs."
try:
# Generate speech
speech = synthesizer(text)
# The transformers pipeline returns a dictionary:
# {'audio': numpy array, 'sampling_rate': int}
audio_data = speech["audio"]
sample_rate = speech["sampling_rate"]
# Gradio expects audio in (sample_rate, numpy_1D_array) format
# Pipeline audio is usually shape (1, N). We squeeze it to (N,)
if len(audio_data.shape) > 1:
audio_data = np.squeeze(audio_data)
return (sample_rate, audio_data), "Success!"
except Exception as e:
return None, f"Generation Error: {str(e)}"
# 4. Build the Gradio Interface
with gr.Blocks(title="Swaram Malayalam TTS", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# 🗣️ Swaram Malayalam Text-to-Speech
Enter Malayalam text below to generate speech using the `aoxo/swaram` model.
"""
)
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Enter Malayalam Text",
placeholder="മലയാളം ടൈപ്പ് ചെയ്യുക...",
lines=5
)
with gr.Row():
clear_btn = gr.Button("Clear")
generate_btn = gr.Button("Generate Speech", variant="primary")
gr.Examples(
examples=[
["നമസ്കാരം, ഇതെന്റെ പുതിയ ശബ്ദമാണ്."],
["കേരളം ദൈവത്തിന്റെ സ്വന്തം നാടാണ്."],
["കള്ളാ കടയാടി മോനെ"]
],
inputs=[text_input],
label="Examples"
)
with gr.Column():
audio_output = gr.Audio(label="Generated Audio", type="numpy", interactive=False)
status_output = gr.Textbox(label="Status", interactive=False)
# Event Listeners
generate_btn.click(
fn=generate_audio,
inputs=[text_input],
outputs=[audio_output, status_output],
api_name="synthesize" # Allows this Space to be used as an API later
)
clear_btn.click(
fn=lambda: (None, None, ""),
inputs=[],
outputs=[text_input, audio_output, status_output]
)
# 5. Launch the app
if __name__ == "__main__":
demo.launch()